Skip to content

Commit

Permalink
Merge pull request #69 from m3dev/pass-file-processor-as-parameter
Browse files Browse the repository at this point in the history
make it possible to pass file processor
  • Loading branch information
nishiba authored Jul 16, 2019
2 parents 2d53494 + 122380d commit 4cba855
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 7 deletions.
10 changes: 5 additions & 5 deletions gokart/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ def load(file_path: str) -> pd.DataFrame:
return pd.concat([pd.read_pickle(file_path) for file_path in glob(os.path.join(dir_path, 'data_*.pkl'))])


def _make_file_system_target(file_path: str) -> luigi.target.FileSystemTarget:
processor = make_file_processor(file_path)
def _make_file_system_target(file_path: str, processor: Optional[FileProcessor] = None) -> luigi.target.FileSystemTarget:
processor = processor or make_file_processor(file_path)
if file_path.startswith('s3://'):
return luigi.contrib.s3.S3Target(file_path, client=S3Config().get_s3_client(), format=processor.format())
return luigi.LocalTarget(file_path, format=processor.format())
Expand All @@ -160,10 +160,10 @@ def _get_last_modification_time(path: str) -> datetime:
return datetime.fromtimestamp(os.path.getmtime(path))


def make_target(file_path: str, unique_id: Optional[str] = None) -> TargetOnKart:
def make_target(file_path: str, unique_id: Optional[str] = None, processor: Optional[FileProcessor] = None) -> TargetOnKart:
file_path = _make_file_path(file_path, unique_id)
processor = make_file_processor(file_path)
file_system_target = _make_file_system_target(file_path)
processor = processor or make_file_processor(file_path)
file_system_target = _make_file_system_target(file_path, processor=processor)
return SingleFileTarget(target=file_system_target, processor=processor)


Expand Down
5 changes: 3 additions & 2 deletions gokart/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd

import gokart
from gokart.file_processor import FileProcessor
from gokart.target import TargetOnKart

logger = getLogger(__name__)
Expand Down Expand Up @@ -95,10 +96,10 @@ def clone(self, cls=None, **kwargs):

return cls(**new_k)

def make_target(self, relative_file_path: str, use_unique_id: bool = True) -> TargetOnKart:
def make_target(self, relative_file_path: str, use_unique_id: bool = True, processor: Optional[FileProcessor] = None) -> TargetOnKart:
file_path = os.path.join(self.workspace_directory, relative_file_path)
unique_id = self.make_unique_id() if use_unique_id else None
return gokart.target.make_target(file_path=file_path, unique_id=unique_id)
return gokart.target.make_target(file_path=file_path, unique_id=unique_id, processor=processor)

def make_large_data_frame_target(self, relative_file_path: str, use_unique_id: bool = True, max_byte=int(2**26)) -> TargetOnKart:
file_path = os.path.join(self.workspace_directory, relative_file_path)
Expand Down
8 changes: 8 additions & 0 deletions test/test_task_on_kart.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from luigi.util import inherits

import gokart
from gokart.file_processor import FileProcessor, XmlFileProcessor
from gokart.target import TargetOnKart, SingleFileTarget, ModelTarget


Expand Down Expand Up @@ -111,6 +112,13 @@ def test_make_target_without_id(self):
path = _DummyTask().make_target('test.txt', use_unique_id=False)._target.path
self.assertEqual(path, os.path.join(_DummyTask().workspace_directory, 'test.txt'))

def test_make_target_with_processor(self):
task = _DummyTask()
processor = XmlFileProcessor()
target = task.make_target('test.dummy', processor=processor)
self.assertEqual(target._processor, processor)
self.assertIsInstance(target, SingleFileTarget)

def test_compare_targets_of_different_tasks(self):
path1 = _DummyTask(param=1).make_target('test.txt')._target.path
path2 = _DummyTask(param=2).make_target('test.txt')._target.path
Expand Down

0 comments on commit 4cba855

Please sign in to comment.