Skip to content

Commit

Permalink
webdataset keys in last position
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq committed Jan 3, 2025
1 parent 3924b7d commit c1ca21e
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/datasets/packaged_modules/webdataset/webdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ def _get_pipeline_from_tar(cls, tar_path, tar_iterator):
if current_example and current_example["__key__"] != example_key:
yield current_example
current_example = {}
current_example["__key__"] = example_key
current_example["__url__"] = tar_path
current_example[field_name.lower()] = f.read()
if field_name.split(".")[-1] in SINGLE_FILE_COMPRESSION_EXTENSION_TO_PROTOCOL:
fs.write_bytes(filename, current_example[field_name.lower()])
Expand All @@ -50,6 +48,8 @@ def _get_pipeline_from_tar(cls, tar_path, tar_iterator):
data_extension = field_name.split(".")[-1]
if data_extension in cls.DECODERS:
current_example[field_name] = cls.DECODERS[data_extension](current_example[field_name])
current_example["__key__"] = example_key
current_example["__url__"] = tar_path
if current_example:
yield current_example

Expand Down

0 comments on commit c1ca21e

Please sign in to comment.