From f507240e4a08e6378f51274adfcabc94c56b9d5f Mon Sep 17 00:00:00 2001 From: oh2024 Date: Fri, 2 Aug 2024 09:49:34 +0000 Subject: [PATCH 1/2] Add vlidation scripts --- tools/validation/data.parquet | Bin 0 -> 1959 bytes tools/validation/offline.sql | 5 +++ tools/validation/online.sql | 2 + tools/validation/prepare_data.sql | 12 ++++++ tools/validation/test.py | 57 +++++++++++++++++++++++++++ tools/validation/test_consistency.sh | 2 + 6 files changed, 78 insertions(+) create mode 100644 tools/validation/data.parquet create mode 100644 tools/validation/offline.sql create mode 100644 tools/validation/online.sql create mode 100644 tools/validation/prepare_data.sql create mode 100644 tools/validation/test.py create mode 100644 tools/validation/test_consistency.sh diff --git a/tools/validation/data.parquet b/tools/validation/data.parquet new file mode 100644 index 0000000000000000000000000000000000000000..36fd1dfc5efb6aa76db9932c1d3bd732afa4e033 GIT binary patch literal 1959 zcmb7F--{bn6h3z{$&TyHHg$8yi4GWs-LS23NHWQ!SpsH7uu9M(wH8#`NoHmZ=EpXf z)RpZ%xmGCKCtuuBd=wEuk>Z0bSfvz^4fyCEpn@;O2cP=nxszmX=x((;>^JwvIp4Y8 zIrmKV)XM8QD$+Cb)f%0nBEYkH1OObz5u!6BCf4itYBWf!;JN}DJdeG7p90lsDnk=U zj!{XsVM*|PO>i7E&lQ+cf<&U#g{wsXbbgzjXU-5hYhnWJn)o=rRalx@;f8r*L}o@A zp&;fEM-XwuQN$C7c?1iC0^&4LqXJVe=_+oq2am#SE>M#u7CytX57nTJQ~ZE* z%bH)&#A`b{&ok+Kg|vxuW*qLy)!}Zf>id@wpCj%f{;XP7WQCC!F!)!5u1^(48t&p# zv#OeeLGVdenaKUSc-on<$ZI=sOBI`c56KKxTciR@lMMjrUx<#8 z7o!-)_(D+sA?f>3l()a5AJXU3G4IXjG`5$r9mf1kP_GN=dox&ZI3SfPv(uF>O;Y)m zq_50kD!j8;DtrY~$Ew$5HG#1;t=n5Qoizs=$qakSyy*4%&2C4FT4vVFf_7}5NB=_0 z#P+)|@B*{fecx<*f#W)Xqx}%sFoF-(JY(7D2faZfFfxYU^jdD;Sbl56=s0a=YUFU6 zyEQwjxb6&EEvMc>I}*K&F^Op`Z)|R6I5*F^%}(I0dc8xCElzHA4|QyTGyPWAVTnDe z;v%QI-2qGS(YZF~2F!orjTlea2?pX4Y4zT|Ze#9T=Q}{3X EU)v|1(EtDd literal 0 HcmV?d00001 diff --git a/tools/validation/offline.sql b/tools/validation/offline.sql new file mode 100644 index 00000000000..3a67e567a05 --- /dev/null +++ b/tools/validation/offline.sql @@ -0,0 +1,5 @@ +SET @@execute_mode='offline'; +SET @@sync_job = "true"; +USE demo_db; +SELECT c1, c2, sum(c3) OVER w1 AS w1_c3_sum FROM demo_table1 WINDOW w1 AS (PARTITION BY demo_table1.c1 ORDER BY demo_table1.c6 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) INTO outfile "file:///tmp/offline.csv" OPTIONS (header = false, mode = 'overwrite'); + diff --git a/tools/validation/online.sql b/tools/validation/online.sql new file mode 100644 index 00000000000..45253df11d6 --- /dev/null +++ b/tools/validation/online.sql @@ -0,0 +1,2 @@ +USE demo_db; +SELECT c1, c2, sum(c3) OVER w1 AS w1_c3_sum FROM demo_table1 WINDOW w1 AS (PARTITION BY demo_table1.c1 ORDER BY demo_table1.c6 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) CONFIG (execute_mode = 'request', values = ("aaa", 11, 22, 1.2, 1.3, 1635247427000, "2021-05-20")); diff --git a/tools/validation/prepare_data.sql b/tools/validation/prepare_data.sql new file mode 100644 index 00000000000..60f93f696ed --- /dev/null +++ b/tools/validation/prepare_data.sql @@ -0,0 +1,12 @@ +CREATE DATABASE demo_db; +USE demo_db; +CREATE TABLE demo_table1(c1 string, c2 int, c3 bigint, c4 float, c5 double, c6 timestamp, c7 date); +SET @@execute_mode='offline'; +SET @@sync_job = "true"; +LOAD DATA INFILE 'file:///tmp/data.parquet' INTO TABLE demo_table1 options(format='parquet', mode='append'); +SET @@execute_mode='offline'; +SELECT c1, c2, sum(c3) OVER w1 AS w1_c3_sum FROM demo_table1 WINDOW w1 AS (PARTITION BY demo_table1.c1 ORDER BY demo_table1.c6 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) INTO OUTFILE '/tmp/feature_data' OPTIONS(mode='overwrite'); +SET @@execute_mode='online'; +DEPLOY demo_data_service SELECT c1, c2, sum(c3) OVER w1 AS w1_c3_sum FROM demo_table1 WINDOW w1 AS (PARTITION BY demo_table1.c1 ORDER BY demo_table1.c6 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW); +SET @@execute_mode='online'; +LOAD DATA INFILE 'file:///tmp/data.parquet' INTO TABLE demo_table1 options(format='parquet', header=true, mode='append'); diff --git a/tools/validation/test.py b/tools/validation/test.py new file mode 100644 index 00000000000..b6723117649 --- /dev/null +++ b/tools/validation/test.py @@ -0,0 +1,57 @@ +import subprocess +import os +import csv + +# Execute the offline SQL command +subprocess.run("/work/openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < offline.sql", + shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + +# Define the directory containing the CSV files +csv_dir = "/tmp/offline.csv/" + +# List all files in the directory +files = os.listdir(csv_dir) + +# Filter out non-CSV files (and avoid reading .crc files) +csv_files = [file for file in files if file.endswith('.csv')] + +# Initialize an empty list to store the combined data +combined_data = [] + +# Read and concatenate all CSV files +for file in csv_files: + with open(os.path.join(csv_dir, file), newline='') as csvfile: + reader = csv.reader(csvfile) + for row in reader: + combined_data.append(row) + +# Define the command to be executed +command = "/work/openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < online.sql" + +# Execute the command +try: + result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + output = result.stdout + + # Extract the relevant line containing the data + lines = output.splitlines() + for i, line in enumerate(lines): + if "c1" in line and "c2" in line and "w1_c3_sum" in line: + data_line = lines[i + 2] # The line containing the data is two lines below the header + break + + # Split the line into an array + data_array = data_line.split() + + # Check if the specific row exists + row_exists = any(row == data_array for row in combined_data) + + if row_exists: + print("Online and offline data consistent") + else: + print("Online and offline data not consistent") + + # Print the resulting array +except subprocess.CalledProcessError as e: + print("An error occurred while executing the command:", e) + print("Error Output:\n", e.stderr) diff --git a/tools/validation/test_consistency.sh b/tools/validation/test_consistency.sh new file mode 100644 index 00000000000..15775fe7fd1 --- /dev/null +++ b/tools/validation/test_consistency.sh @@ -0,0 +1,2 @@ +/work/openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < /tmp/prepare_data.sql +python3 test.py From 66384916de07a7d7f384d264342ba4c7ff7eecf3 Mon Sep 17 00:00:00 2001 From: oh2024 Date: Tue, 6 Aug 2024 08:29:05 +0000 Subject: [PATCH 2/2] Add README.md --- tools/validation/README.md | 3 +++ tools/validation/test.py | 6 ++++-- tools/validation/test_consistency.sh | 5 +++-- 3 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 tools/validation/README.md diff --git a/tools/validation/README.md b/tools/validation/README.md new file mode 100644 index 00000000000..9ae5493d649 --- /dev/null +++ b/tools/validation/README.md @@ -0,0 +1,3 @@ +The idea of this solution is to run the same query in both online and offline modes in OpenMLDB and compare their results to ensure they produce the same outcome. + +Run the test_consistency.sh script in /tmp/ and ensure that the paths for OPENMLDB_BIN_PATH, zk_cluster, and zk_root_path are set correctly. \ No newline at end of file diff --git a/tools/validation/test.py b/tools/validation/test.py index b6723117649..f48b7ce7e94 100644 --- a/tools/validation/test.py +++ b/tools/validation/test.py @@ -1,9 +1,11 @@ import subprocess import os import csv +import sys +openmldb_binary_path = sys.argv[1] # Execute the offline SQL command -subprocess.run("/work/openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < offline.sql", +subprocess.run(f"{openmldb_binary_path} --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < offline.sql", shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) # Define the directory containing the CSV files @@ -26,7 +28,7 @@ combined_data.append(row) # Define the command to be executed -command = "/work/openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < online.sql" +command = f"{openmldb_binary_path} --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < online.sql" # Execute the command try: diff --git a/tools/validation/test_consistency.sh b/tools/validation/test_consistency.sh index 15775fe7fd1..632acfaba50 100644 --- a/tools/validation/test_consistency.sh +++ b/tools/validation/test_consistency.sh @@ -1,2 +1,3 @@ -/work/openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < /tmp/prepare_data.sql -python3 test.py +OPENMLDB_BIN_PATH=/work/openmldb/bin/openmldb +$OPENMLDB_BIN_PATH --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client < /tmp/prepare_data.sql +python3 test.py $OPENMLDB_BIN_PATH \ No newline at end of file