Merge pull request #1 from qalita-io/dev

Dev
qalita-io · Dec 4, 2023 · fc76078 · fc76078
2 parents 053ce71 + 9cbcedb
commit fc76078
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 17 deletions.
diff --git a/profiling_pack/main.py b/profiling_pack/main.py
@@ -59,19 +59,21 @@ def denormalize(data):
 # Get the path from the config file
 path = config["config"]["path"]
 
-# Check if there are CSV files in the path
-print("Check csv files")
-csv_files = glob.glob(os.path.join(path, "*.csv"))
-
-if csv_files:
-    print("CSV files found:")
-    first_csv_file = csv_files[0]
-    print(f"Loading first CSV file: {first_csv_file}")
-    df = pd.read_csv(
-        first_csv_file, low_memory=False, memory_map=True, on_bad_lines="skip"
-    )
-else:
-    raise FileNotFoundError("No CSV files found in the provided path.")
+# Check for CSV and XLSX files in the path
+print("Checking for data files")
+data_files = glob.glob(os.path.join(path, "*.csv")) + glob.glob(os.path.join(path, "*.xlsx"))
+
+if not data_files:
+    raise FileNotFoundError("No CSV or XLSX files found in the provided path.")
+
+# Load the first data file (either CSV or XLSX)
+first_data_file = data_files[0]
+print(f"Loading first data file: {first_data_file}")
+
+if first_data_file.endswith('.csv'):
+    df = pd.read_csv(first_data_file, low_memory=False, memory_map=True, on_bad_lines="skip")
+elif first_data_file.endswith('.xlsx'):
+    df = pd.read_excel(first_data_file, engine='openpyxl')
 
 profile = ProfileReport(df, minimal=True, title="Profiling Report")
 profile.to_file("report.html")

diff --git a/profiling_pack/properties.yaml b/profiling_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: profiling
 type: completeness
 url: https://github.com/qalita-io/packs/tree/main/profiling_pack
-version: 1.0.8
+version: 1.0.14
 visibility: public
diff --git a/profiling_pack/pyproject.toml b/profiling_pack/pyproject.toml
@@ -11,7 +11,8 @@ python = ">=3.10,<3.12"
 ydata-profiling = "^4.6.0"
 matplotlib = "3.7.0"
 lxml = "^4.9.3"
-pandas = "1.4.x"
+pandas = "2.0.3"
+openpyxl = "^3.1.2"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/profiling_pack/run.sh b/profiling_pack/run.sh
@@ -13,11 +13,13 @@ else
     exit 1
 fi
 
+POETRY_INSTALLER_MAX_WORKERS=10
+
 # Extract pack name from properties.yaml using Python
 PACK_NAME=$($PYTHON_CMD get_pack_name.py)
 
 # Install poetry if it's not installed
-if ! command -v poetry &> /dev/null
+if ! command -v poetry > /dev/null
 then
     echo "Poetry could not be found, installing now..."
     export POETRY_HOME="$HOME/.poetry"
@@ -33,7 +35,7 @@ if ! $PYTHON_CMD -m venv --help > /dev/null 2>&1; then
 fi
 
 # Check if virtual environment specific to the pack exists in the parent directory
-VENV_PATH="$HOME/${PACK_NAME}_venv"
+VENV_PATH="$HOME/.qalita/agent_run_temp/${PACK_NAME}_venv"
 
 if [ ! -d "$VENV_PATH" ]; then
     $PYTHON_CMD -m venv "$VENV_PATH"