Skip to content

Commit

Permalink
surface a problem in current loadHF: it realizes too late that stream…
Browse files Browse the repository at this point in the history
…ing should be False. For both cases: split = str, and split=None

Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Feb 17, 2025
1 parent fe79da3 commit d9337c3
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions tests/library/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unittest.mock import patch

import pandas as pd
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
from unitxt.error_utils import UnitxtError
from unitxt.loaders import (
LoadCSV,
Expand Down Expand Up @@ -232,6 +233,23 @@ def test_load_from_HF_multiple_innvocation_with_filter(self):
) # that HF dataset only has the 'test' split
self.assertEqual(instance["language"], "eng")

def test_load_HF_lazily(self):
lazy_loader = LoadHF(path="ibm/finqa", streaming=True)
dataset = lazy_loader.load_dataset(split="test")
self.assertIsInstance(dataset, (Dataset, IterableDataset))
# we just assured that load_dataset completed OK, having changed the streaming from True to False
# now we try to touch the arriving dataset, which in current main is only done by the split generator when yielding
first_example = next(iter(dataset))
self.assertIsNotNone (first_example)
# the same goes when split=None:
dataset = lazy_loader.load_dataset(split=None)
self.assertIsInstance(dataset, (DatasetDict, IterableDatasetDict))
# we just assured that load_dataset completed OK, having changed to streaming=False
# now we try to touch the samples in the arriving dataset, which in current main is only done by the split generator when yielding
for k in dataset.keys():
first_example = next(iter(dataset[k]))
self.assertIsNotNone (first_example)

def test_load_from_HF_split(self):
loader = LoadHF(path="sst2", split="train")
ms = loader()
Expand Down

0 comments on commit d9337c3

Please sign in to comment.