diff --git a/.vscode/launch.json b/.vscode/launch.json index 7850542..13711a1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,7 +10,40 @@ "request": "launch", "program": "${workspaceFolder}/file_merger.py", "args": [ - "armel" + "-i", + "${workspaceFolder}\\dataset\\small_dataset", + "-f", + "${workspaceFolder}\\dataset\\output.txt" + ], + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Python: Generate Fake Dataset", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/setup.py", + "args": [ + "generate_fake_dataset", + "--num-files", + "5", + "--min-words-per-file", + "100", + "--max-words-per-file", + "500", + "--output-dir", + "dataset/small_dataset" + ], + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Python: Coverage Report", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/setup.py", + "args": [ + "coverage" ], "console": "integratedTerminal", "justMyCode": true diff --git a/.vscode/settings.json b/.vscode/settings.json index 958bfe7..bf833fa 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,5 +9,8 @@ "python.testing.pytestEnabled": false, "python.testing.unittestEnabled": true, "python.linting.pylintEnabled": true, - "python.linting.enabled": true + "python.linting.enabled": true, + "python.formatting.autopep8Args": [ + "--max-line-length=100" + ] } \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/merge_files/mergers/async_.py b/merge_files/mergers/async_.py index ba65f91..f635d3a 100644 --- a/merge_files/mergers/async_.py +++ b/merge_files/mergers/async_.py @@ -1,7 +1,7 @@ import asyncio import shutil -from .base import FileMerger +from merge_files.mergers.base import FileMerger class AsyncFileMerger(FileMerger): diff --git a/merge_files/mergers/basic.py b/merge_files/mergers/basic.py index 2a34830..cd34257 100644 --- a/merge_files/mergers/basic.py +++ b/merge_files/mergers/basic.py @@ -1,4 +1,4 @@ -from .base import FileMerger +from merge_files.mergers.base import FileMerger class BasicFileMerger(FileMerger): diff --git a/merge_files/mergers/parallel.py b/merge_files/mergers/parallel.py index 0b32107..83bccea 100644 --- a/merge_files/mergers/parallel.py +++ b/merge_files/mergers/parallel.py @@ -3,7 +3,7 @@ import shutil from typing import List -from .base import FileMerger +from merge_files.mergers.base import FileMerger class ParallelFileMerger(FileMerger): diff --git a/merge_files/utils.py b/merge_files/utils.py index 696f031..7e79fb9 100644 --- a/merge_files/utils.py +++ b/merge_files/utils.py @@ -43,7 +43,7 @@ def check_valid_path(path: str) -> str: ref.extractall(extract_path) return extract_path except (zipfile.BadZipfile, tarfile.ReadError) as e: - raise ValueError(f"File could not be opened successfully: {e}") + raise ValueError(f"File could not be opened successfully:") from e # if the path is not a folder or a supported compressed file type, raise an exception raise ValueError(f"Please enter a valid path: {path}") diff --git a/scripts/dataset.py b/scripts/dataset.py index 1ff7fc0..336bcec 100644 --- a/scripts/dataset.py +++ b/scripts/dataset.py @@ -61,6 +61,7 @@ def generate_fake_dataset(num_files: int, min_words_per_file: int, output_file = os.path.join(output_dir, f'file_{i}.dat') with open(output_file, 'w') as f: f.write('\n'.join(words)) + print("Fake dataset has been generated.") if __name__ == "__main__":