Skip to content

Commit

Permalink
Add JSON loader (#296)
Browse files Browse the repository at this point in the history
  • Loading branch information
tlzhu19 authored Dec 1, 2021
1 parent 455d725 commit 1d1600a
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 0 deletions.
1 change: 1 addition & 0 deletions pardata/_high_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def load_dataset_from_location(url_or_path: Union[str, typing_.PathLike], *,
RegexFormatPair(regex=r'.*\.(txt|log)', format='text/plain'),
RegexFormatPair(regex=r'.*\.(jpg|jpeg)', format='image/jpeg'),
RegexFormatPair(regex=r'.*\.png', format='image/png'),
RegexFormatPair(regex=r'.*\.json', format='json'),
)

for regex_format_pair in regex_format_pairs:
Expand Down
2 changes: 2 additions & 0 deletions pardata/loaders/_format_loader_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .image import PillowLoader
from .text import PlainTextLoader
from .table import CSVPandasLoader
from .json import JSONLoader


class FormatLoaderMap:
Expand Down Expand Up @@ -79,6 +80,7 @@ def __contains__(self, fmt: str) -> bool:
'image/jpeg': PillowLoader(),
'image/png': PillowLoader(),
'audio/wav': WaveLoader(),
'json': JSONLoader(),
})


Expand Down
45 changes: 45 additions & 0 deletions pardata/loaders/_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#
# Copyright 2021 IBM Corp. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"JSON file loaders."


from typing import cast, Dict, Union, Any
import json

from .. import typing as typing_
from ..schema import SchemaDict
from ._base import Loader


class JSONLoader(Loader):
"""Loads a JSON file to an object representing the data."""

def load(self, path: Union[typing_.PathLike, Dict[str, str]], options: SchemaDict) -> Any:
"""
:param path: The path to the JSON file.
:param options: None for JSON loader.
:raises TypeError: ``path`` is not a path-like object.
:return: An object representing loaded data. See :meth:`json.load` for details.
"""

super().load(path, options)

# We can remove usage of cast once Dict[str, str] handling is added
path = cast(typing_.PathLike, path)

with open(path) as json_file:
return json.load(json_file)
22 changes: 22 additions & 0 deletions pardata/loaders/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#
# Copyright 2021 IBM Corp. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"JSON loaders."


from ._json import JSONLoader

__all__ = ('JSONLoader',)
17 changes: 17 additions & 0 deletions tests/assets/people.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[
{
"name": "Alice",
"age": 25,
"state": "California"
},
{
"name": "Bob",
"age": 21,
"state": "Florida"
},
{
"name": "Carol",
"age": 28,
"state": "Texas"
}
]
7 changes: 7 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,3 +409,10 @@ def bell_sound(asset_dir) -> Path:
"Path to the service-bell.wav."

return asset_dir / 'service-bell.wav'


@pytest.fixture
def people_json(asset_dir) -> Path:
"Path to people.json"

return asset_dir / 'people.json'
14 changes: 14 additions & 0 deletions tests/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from pandas.api.types import is_datetime64_any_dtype, is_float_dtype, is_integer_dtype, is_string_dtype
from PIL import Image, ImageChops
import wave
import json

from pardata.dataset import Dataset
from pardata.loaders import Loader
Expand All @@ -31,6 +32,7 @@
from pardata.loaders.image import PillowLoader
from pardata.loaders.text import PlainTextLoader
from pardata.loaders.table import CSVPandasLoader
from pardata.loaders.json import JSONLoader


class TestBaseLoader:
Expand Down Expand Up @@ -339,3 +341,15 @@ def test_csv_pandas_header(self, tmp_path, noaa_jfk_schema):

del noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format']['options']['no_header']
self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)


class TestJSONLoaders:
def test_json_loader(self, people_json):
"Test the normal functionality of JSONLoader"

with open(people_json) as local:
local_content = json.load(local)

loaded_content = JSONLoader().load(people_json, {})

assert local_content == loaded_content

0 comments on commit 1d1600a

Please sign in to comment.