Skip to content

Commit

Permalink
Merge pull request #1 from qalita-io/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
armandleopold authored Dec 4, 2023
2 parents 053ce71 + 9cbcedb commit fc76078
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 17 deletions.
28 changes: 15 additions & 13 deletions profiling_pack/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,21 @@ def denormalize(data):
# Get the path from the config file
path = config["config"]["path"]

# Check if there are CSV files in the path
print("Check csv files")
csv_files = glob.glob(os.path.join(path, "*.csv"))

if csv_files:
print("CSV files found:")
first_csv_file = csv_files[0]
print(f"Loading first CSV file: {first_csv_file}")
df = pd.read_csv(
first_csv_file, low_memory=False, memory_map=True, on_bad_lines="skip"
)
else:
raise FileNotFoundError("No CSV files found in the provided path.")
# Check for CSV and XLSX files in the path
print("Checking for data files")
data_files = glob.glob(os.path.join(path, "*.csv")) + glob.glob(os.path.join(path, "*.xlsx"))

if not data_files:
raise FileNotFoundError("No CSV or XLSX files found in the provided path.")

# Load the first data file (either CSV or XLSX)
first_data_file = data_files[0]
print(f"Loading first data file: {first_data_file}")

if first_data_file.endswith('.csv'):
df = pd.read_csv(first_data_file, low_memory=False, memory_map=True, on_bad_lines="skip")
elif first_data_file.endswith('.xlsx'):
df = pd.read_excel(first_data_file, engine='openpyxl')

profile = ProfileReport(df, minimal=True, title="Profiling Report")
profile.to_file("report.html")
Expand Down
2 changes: 1 addition & 1 deletion profiling_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: profiling
type: completeness
url: https://github.com/qalita-io/packs/tree/main/profiling_pack
version: 1.0.8
version: 1.0.14
visibility: public
3 changes: 2 additions & 1 deletion profiling_pack/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ python = ">=3.10,<3.12"
ydata-profiling = "^4.6.0"
matplotlib = "3.7.0"
lxml = "^4.9.3"
pandas = "1.4.x"
pandas = "2.0.3"
openpyxl = "^3.1.2"

[build-system]
requires = ["poetry-core"]
Expand Down
6 changes: 4 additions & 2 deletions profiling_pack/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ else
exit 1
fi

POETRY_INSTALLER_MAX_WORKERS=10

# Extract pack name from properties.yaml using Python
PACK_NAME=$($PYTHON_CMD get_pack_name.py)

# Install poetry if it's not installed
if ! command -v poetry &> /dev/null
if ! command -v poetry > /dev/null
then
echo "Poetry could not be found, installing now..."
export POETRY_HOME="$HOME/.poetry"
Expand All @@ -33,7 +35,7 @@ if ! $PYTHON_CMD -m venv --help > /dev/null 2>&1; then
fi

# Check if virtual environment specific to the pack exists in the parent directory
VENV_PATH="$HOME/${PACK_NAME}_venv"
VENV_PATH="$HOME/.qalita/agent_run_temp/${PACK_NAME}_venv"

if [ ! -d "$VENV_PATH" ]; then
$PYTHON_CMD -m venv "$VENV_PATH"
Expand Down

0 comments on commit fc76078

Please sign in to comment.