opendataqna.py

import asyncio
import argparse
import uuid

from agents import EmbedderAgent, BuildSQLAgent, DebugSQLAgent, ValidateSQLAgent, ResponseAgent,VisualizeAgent
from utilities import (PROJECT_ID, PG_REGION, BQ_REGION, EXAMPLES, LOGGING, VECTOR_STORE,
                       BQ_OPENDATAQNA_DATASET_NAME, USE_SESSION_HISTORY)
from dbconnectors import bqconnector, pgconnector, firestoreconnector
from embeddings.store_embeddings import add_sql_embedding


#Based on VECTOR STORE in config.ini initialize vector connector and region
if VECTOR_STORE=='bigquery-vector':
    region=BQ_REGION
    vector_connector = bqconnector
    call_await = False

elif VECTOR_STORE == 'cloudsql-pgvector':
    region=PG_REGION
    vector_connector = pgconnector
    call_await=True

else: 
    raise ValueError("Please specify a valid Data Store. Supported are either 'bigquery-vector' or 'cloudsql-pgvector'")


def generate_uuid():
    """Generates a random UUID (Universally Unique Identifier) Version 4.

    Returns:
        str: A string representation of the UUID in the format 
             xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.
    """
    return str(uuid.uuid4())


############################
#_____GET ALL DATABASES_____#
############################
def get_all_databases():
    """Retrieves a list of all distinct databases (with source type) from the vector store.

    This function queries the vector store (BigQuery or PostgreSQL) to fetch a list of 
    unique databases, including their source type. The source type indicates whether 
    the database is a BigQuery dataset or a PostgreSQL schema.

    Returns:
        tuple: A tuple containing two elements:
            - result (str or list): A JSON-formatted string containing the list of databases and their source types,
                                 or an error message if an exception occurs.
            - invalid_response (bool): A flag indicating whether an error occurred during retrieval (True)
                                      or if the response is valid (False).

    Raises:
        Exception: If there is an issue connecting to or querying the vector store.
                   The exception message will be included in the returned `result`.
    """
    
    try:
        if VECTOR_STORE=='bigquery-vector': 
            final_sql=f'''SELECT
    DISTINCT user_grouping AS table_schema
    FROM
        `{PROJECT_ID}.{BQ_OPENDATAQNA_DATASET_NAME}.table_details_embeddings`'''

        else:
            final_sql="""SELECT
    DISTINCT user_grouping AS table_schema
    FROM
    table_details_embeddings"""
        result = vector_connector.retrieve_df(final_sql)
        result = result.to_json(orient='records')
        invalid_response=False

    except Exception as e:
        result="Issue was encountered while extracting databases in vector store:: "  + str(e)
        invalid_response=True
    return result,invalid_response


############################
#_____GET SOURCE TYPE_____##
############################
def get_source_type(user_grouping):
    """Retrieves the source type of a specified database from the vector store.

    This function queries the vector store (BigQuery or PostgreSQL) to determine whether the
    given database is a BigQuery dataset ('bigquery') or a PostgreSQL schema ('postgres').

    Args:
        user_grouping (str): The name of the database to look up.

    Returns:
        tuple: A tuple containing two elements:
            - result (str): The source type of the database ('bigquery' or 'postgres'), or an error message if not found or an exception occurs.
            - invalid_response (bool): A flag indicating whether an error occurred during retrieval (True) or if the response is valid (False).

    Raises:
        Exception: If there is an issue connecting to or querying the vector store. The exception message will be included in the returned `result`.
    """
    try: 
        if VECTOR_STORE=='bigquery-vector': 
            sql=f'''SELECT
        DISTINCT source_type
        FROM
        `{PROJECT_ID}.{BQ_OPENDATAQNA_DATASET_NAME}.table_details_embeddings`
        where user_grouping='{user_grouping}' '''

        else:
            sql=f'''SELECT
        DISTINCT source_type
        FROM
        table_details_embeddings where user_grouping='{user_grouping}' '''
        
        result = vector_connector.retrieve_df(sql)
        result = (str(result.iloc[0, 0])).lower() 
        invalid_response=False
    except Exception as e:
        result="Error at finding the datasource :: "+str(e)
        invalid_response=True
    return result,invalid_response


############################
###_____GENERATE SQL_____###
############################
async def generate_sql(session_id,
                user_question,
                user_grouping,  
                RUN_DEBUGGER,
                DEBUGGING_ROUNDS, 
                LLM_VALIDATION,
                Embedder_model,
                SQLBuilder_model,
                SQLChecker_model,
                SQLDebugger_model,
                num_table_matches,
                num_column_matches,
                table_similarity_threshold,
                column_similarity_threshold,
                example_similarity_threshold,
                num_sql_matches,
                user_id="opendataqna-user@google.com"):
    """Generates an SQL query based on a user's question and database.

    This asynchronous function orchestrates a pipeline to generate an SQL query from a natural language question.
    It leverages various agents for embedding, SQL building, validation, and debugging.

    Args:
        session_id (str): Session ID to identify the chat conversation
        user_question (str): The user's natural language question.
        user_grouping (str): The name of the database to query.
        RUN_DEBUGGER (bool): Whether to run the SQL debugger.
        DEBUGGING_ROUNDS (int): The number of debugging rounds to perform.
        LLM_VALIDATION (bool): Whether to use LLM for validation.
        Embedder_model (str): The name of the embedding model.
        SQLBuilder_model (str): The name of the SQL builder model.
        SQLChecker_model (str): The name of the SQL checker model.
        SQLDebugger_model (str): The name of the SQL debugger model.
        num_table_matches (int): The number of table matches to retrieve.
        num_column_matches (int): The number of column matches to retrieve.
        table_similarity_threshold (float): The similarity threshold for table matching.
        column_similarity_threshold (float): The similarity threshold for column matching.
        example_similarity_threshold (float): The similarity threshold for example matching.
        num_sql_matches (int): The number of similar SQL queries to retrieve.

    Returns:
        tuple: A tuple containing:
            - final_sql (str): The final generated SQL query, or an error message if generation failed.
            - invalid_response (bool): True if the response is invalid (e.g., due to an error), False otherwise.
    """

    try:

        if session_id is None or session_id=="":
            print("This is a new session")
            session_id=generate_uuid()

        ## LOAD AGENTS 

        print("Loading Agents.")
        embedder = EmbedderAgent(Embedder_model) 
        SQLBuilder = BuildSQLAgent(SQLBuilder_model)
        SQLChecker = ValidateSQLAgent(SQLChecker_model)
        SQLDebugger = DebugSQLAgent(SQLDebugger_model)

        re_written_qe=user_question

        print("Getting the history for the session.......\n")
        session_history = firestoreconnector.get_chat_logs_for_session(session_id) if USE_SESSION_HISTORY else None
        print("Grabbed history for the session:: "+ str(session_history))

        if session_history is None or not session_history:
            print("No records for the session. Not rewriting the question\n")
        else:
            concated_questions,re_written_qe=SQLBuilder.rewrite_question(user_question,session_history)


        found_in_vector = 'N' # if an exact query match was found 
        final_sql='Not Generated Yet' # final generated SQL 
        process_step='Not Started'
        error_msg=''
        corrected_sql = ''
        DATA_SOURCE = 'Yet to determine'

        DATA_SOURCE,src_invalid = get_source_type(user_grouping)

        if src_invalid:
            raise ValueError(DATA_SOURCE)

        #vertexai.init(project=PROJECT_ID, location=region)
        #aiplatform.init(project=PROJECT_ID, location=region)

        print("Source selected as : "+ str(DATA_SOURCE) + "\nSchema or Dataset Name is : "+ str(user_grouping))
        print("Vector Store selected as : "+ str(VECTOR_STORE))

        # Reset AUDIT_TEXT
        AUDIT_TEXT = 'Creating embedding for given question'
        # Fetch the embedding of the user's input question 
        embedded_question = embedder.create(re_written_qe)

        
        AUDIT_TEXT = AUDIT_TEXT + "\nUser Question : " + str(user_question) + "\nUser Database : " + str(user_grouping)
        process_step = "\n\nGet Exact Match: "

        # Look for exact matches in known questions IF kgq is enabled 
        if EXAMPLES: 
            exact_sql_history = vector_connector.getExactMatches(user_question) 

        else: exact_sql_history = None 

        # If exact user query has been found, retrieve the SQL and skip Generation Pipeline 
        if exact_sql_history is not None:
            found_in_vector = 'Y' 
            final_sql = exact_sql_history
            invalid_response = False
            AUDIT_TEXT = AUDIT_TEXT + "\nExact match has been found! Going to retrieve the SQL query from cache and serve!"


        else:
            # No exact match found. Proceed looking for similar entries in db IF kgq is enabled 
            if EXAMPLES: 
                AUDIT_TEXT = AUDIT_TEXT +  process_step + "\nNo exact match found in query cache, retrieving relevant schema and known good queries for few shot examples using similarity search...."
                process_step = "\n\nGet Similar Match: "
                if call_await:
                    similar_sql = await vector_connector.getSimilarMatches('example', user_grouping, embedded_question, num_sql_matches, example_similarity_threshold)
                else:
                    similar_sql = vector_connector.getSimilarMatches('example', user_grouping, embedded_question, num_sql_matches, example_similarity_threshold)

            else: similar_sql = "No similar SQLs provided..."

            process_step = "\n\nGet Table and Column Schema: "
            # Retrieve matching tables and columns
            if call_await: 
                table_matches =  await vector_connector.getSimilarMatches('table', user_grouping, embedded_question, num_table_matches, table_similarity_threshold)
                column_matches =  await vector_connector.getSimilarMatches('column', user_grouping, embedded_question, num_column_matches, column_similarity_threshold)
            else:
                table_matches =  vector_connector.getSimilarMatches('table', user_grouping, embedded_question, num_table_matches, table_similarity_threshold)
                column_matches =  vector_connector.getSimilarMatches('column', user_grouping, embedded_question, num_column_matches, column_similarity_threshold)

            AUDIT_TEXT = AUDIT_TEXT +  process_step + "\nRetrieved Similar Known Good Queries, Table Schema and Column Schema: \n" + '\nRetrieved Tables: \n' + str(table_matches) + '\n\nRetrieved Columns: \n' + str(column_matches) + '\n\nRetrieved Known Good Queries: \n' + str(similar_sql)
            
            
            # If similar table and column schemas found: 
            if len(table_matches.replace('Schema(values):','').replace(' ','')) > 0 or len(column_matches.replace('Column name(type):','').replace(' ','')) > 0 :

                # GENERATE SQL
                process_step = "\n\nBuild SQL: "
                generated_sql = SQLBuilder.build_sql(DATA_SOURCE,user_grouping,user_question,session_history,table_matches,column_matches,similar_sql)
                final_sql=generated_sql
                AUDIT_TEXT = AUDIT_TEXT + process_step +  "\nGenerated SQL : " + str(generated_sql)
                
                if 'unrelated_answer' in generated_sql :
                    invalid_response=True
                    final_sql="This is an unrelated question or you are not asking a valid query"

                # If agent assessment is valid, proceed with checks  
                else:
                    invalid_response=False

                    if RUN_DEBUGGER: 
                        generated_sql, invalid_response, AUDIT_TEXT = SQLDebugger.start_debugger(DATA_SOURCE,user_grouping, generated_sql, user_question, SQLChecker, table_matches, column_matches, AUDIT_TEXT, similar_sql, DEBUGGING_ROUNDS, LLM_VALIDATION) 
                        # AUDIT_TEXT = AUDIT_TEXT + '\n Feedback from Debugger: \n' + feedback_text

                    final_sql=generated_sql
                    AUDIT_TEXT = AUDIT_TEXT + "\nFinal SQL after Debugger: \n" +str(final_sql)


            # No matching table found 
            else:
                invalid_response=True
                print('No tables found in Vector ...')
                AUDIT_TEXT = AUDIT_TEXT + "\nNo tables have been found in the Vector DB. The question cannot be answered with the provide data source!"

        # print(f'\n\n AUDIT_TEXT: \n {AUDIT_TEXT}')

        if LOGGING: 
            bqconnector.make_audit_entry(DATA_SOURCE, user_grouping, SQLBuilder_model, user_question, final_sql, found_in_vector, "", process_step, error_msg,AUDIT_TEXT)  


    except Exception as e:
        error_msg=str(e)
        final_sql="Error generating the SQL Please check the logs. "+str(e)
        invalid_response=True
        AUDIT_TEXT=AUDIT_TEXT+ "\nException at SQL generation"
        print("Error :: "+str(error_msg))
        if LOGGING: 
            bqconnector.make_audit_entry(DATA_SOURCE, user_grouping, SQLBuilder_model, user_question, final_sql, found_in_vector, "", process_step, error_msg,AUDIT_TEXT)  

    if USE_SESSION_HISTORY and not invalid_response:
        firestoreconnector.log_chat(session_id,user_question,final_sql,user_id)
        print("Session history persisted")  


    return final_sql,session_id,invalid_response


############################
###_____GET RESULTS_____####
############################
def get_results(user_grouping, final_sql, invalid_response=False, EXECUTE_FINAL_SQL=True):
    """Executes the final SQL query (if valid) and retrieves the results.

    This function first determines the data source (BigQuery or PostgreSQL) based on the provided database name.
    If the SQL query is valid and execution is enabled, it fetches the results using the appropriate connector.

    Args:
        user_grouping (str): The name of the database to query.
        final_sql (str): The final SQL query to execute.
        invalid_response (bool, optional): A flag indicating whether the SQL query is invalid. Defaults to False.
        EXECUTE_FINAL_SQL (bool, optional): Whether to execute the final SQL query. Defaults to True.

    Returns:
        tuple: A tuple containing:
            - result_df (pandas.DataFrame or str): The results of the SQL query as a DataFrame, or an error message if the query is invalid or execution failed.
            - invalid_response (bool): True if the response is invalid (e.g., due to an error), False otherwise.

    Raises:
        ValueError: If the data source is invalid or not supported.
        Exception: If there's an error executing the SQL query or retrieving the results.
    """
    
    try:

        DATA_SOURCE,src_invalid = get_source_type(user_grouping)
        
        if not src_invalid:
            ## SET DATA SOURCE 
            if DATA_SOURCE=='bigquery':
                src_connector = bqconnector
            else: 
                src_connector = pgconnector
        else:
            raise ValueError(DATA_SOURCE)

        if not invalid_response:
            try: 
                if EXECUTE_FINAL_SQL is True:
                        final_exec_result_df=src_connector.retrieve_df(final_sql.replace("```sql","").replace("```","").replace("EXPLAIN ANALYZE ",""))
                        result_df = final_exec_result_df

                else:  # Do not execute final SQL
                        print("Not executing final SQL since EXECUTE_FINAL_SQL variable is False\n ")
                        result_df = "Please enable the Execution of the final SQL so I can provide an answer" 
                        invalid_response = True
                        
            except ValueError: 
                result_df= "Error has been encountered :: " + str(e)
                invalid_response=True
                
        else:  # Do not execute final SQL
            result_df = "Not executing final SQL as it is invalid, please debug!"
            
    except Exception as e: 
        print(f"An error occured. Aborting... Error Message: {e}")
        result_df="Error has been encountered :: " + str(e)
        invalid_response=True

    return result_df,invalid_response

def get_response(session_id,user_question,result_df,Responder_model='gemini-1.0-pro'):
    try:
        Responder = ResponseAgent(Responder_model)

        if session_id is None or session_id=="":
            print("This is a new session")
        else:
            session_history =firestoreconnector.get_chat_logs_for_session(session_id) if USE_SESSION_HISTORY else None
            if session_history is None or not session_history:
                print("No records for the session. Not rewriting the question\n")
            else:
                concated_questions,re_written_qe=Responder.rewrite_question(user_question,session_history)
                user_question=re_written_qe
        
        _resp=Responder.run(user_question, result_df)
        invalid_response=False
    except Exception as e: 
        print(f"An error occured. Aborting... Error Message: {e}")
        _resp= "Error has been encountered :: " + str(e)
        invalid_response=True

    return _resp,invalid_response

############################
###_____RUN PIPELINE_____###
############################
async def run_pipeline(session_id,
                user_question,
                user_grouping,
                RUN_DEBUGGER=True,
                EXECUTE_FINAL_SQL=True,
                DEBUGGING_ROUNDS = 2, 
                LLM_VALIDATION=False,
                Embedder_model='vertex',
                SQLBuilder_model= 'gemini-1.5-pro',
                SQLChecker_model= 'gemini-1.0-pro',
                SQLDebugger_model= 'gemini-1.0-pro',
                Responder_model= 'gemini-1.0-pro',
                num_table_matches = 5,
                num_column_matches = 10,
                table_similarity_threshold = 0.3,
                column_similarity_threshold = 0.3, 
                example_similarity_threshold = 0.3, 
                num_sql_matches=3): 
    """Orchestrates the end-to-end SQL generation and response pipeline.

    This asynchronous function manages the entire process of generating an SQL query from a user's question,
    executing the query (if valid), and formulating a natural language response based on the results.

    Args:
        user_question (str): The user's natural language question.
        user_grouping (str): The name of the user grouping to query.
        RUN_DEBUGGER (bool, optional): Whether to run the SQL debugger. Defaults to True.
        EXECUTE_FINAL_SQL (bool, optional): Whether to execute the final SQL query. Defaults to True.
        DEBUGGING_ROUNDS (int, optional): The number of debugging rounds to perform. Defaults to 2.
        LLM_VALIDATION (bool, optional): Whether to use LLM for validation. Defaults to True.
        Embedder_model (str, optional): The name of the embedding model. Defaults to 'vertex'.
        SQLBuilder_model (str, optional): The name of the SQL builder model. Defaults to 'gemini-1.5-pro'.
        SQLChecker_model (str, optional): The name of the SQL checker model. Defaults to 'gemini-1.0-pro'.
        SQLDebugger_model (str, optional): The name of the SQL debugger model. Defaults to 'gemini-1.0-pro'.
        Responder_model (str, optional): The name of the responder model. Defaults to 'gemini-1.0-pro'.
        num_table_matches (int, optional): The number of table matches to retrieve. Defaults to 5.
        num_column_matches (int, optional): The number of column matches to retrieve. Defaults to 10.
        table_similarity_threshold (float, optional): The similarity threshold for table matching. Defaults to 0.3.
        column_similarity_threshold (float, optional): The similarity threshold for column matching. Defaults to 0.3.
        example_similarity_threshold (float, optional): The similarity threshold for example matching. Defaults to 0.3.
        num_sql_matches (int, optional): The number of similar SQL queries to retrieve. Defaults to 3.

    Returns:
        tuple: A tuple containing:
            - final_sql (str): The final generated SQL query, or an error message if generation failed.
            - results_df (pandas.DataFrame or str): The results of the SQL query as a DataFrame, or an error message if the query is invalid or execution failed.
            - _resp (str): The generated natural language response based on the results, or an error message if response generation failed.
    """


    final_sql,session_id, invalid_response = await generate_sql(session_id,
                user_question,
                user_grouping,
                RUN_DEBUGGER,
                DEBUGGING_ROUNDS, 
                LLM_VALIDATION,
                Embedder_model,
                SQLBuilder_model,
                SQLChecker_model,
                SQLDebugger_model,
                num_table_matches,
                num_column_matches,
                table_similarity_threshold,
                column_similarity_threshold,
                example_similarity_threshold,
                num_sql_matches)

    if not invalid_response:
        
        results_df, invalid_response = get_results(user_grouping, 
                                    final_sql,
                                    invalid_response=invalid_response,
                                    EXECUTE_FINAL_SQL=EXECUTE_FINAL_SQL)

        if not invalid_response:
            _resp,invalid_response=get_response(session_id,user_question,results_df.to_json(orient='records'),Responder_model=Responder_model)
        else:
            _resp=results_df
    else:
        results_df=final_sql
        _resp=final_sql

    return final_sql, results_df, _resp


############################
#####_____GET KGQ_____######
############################
def get_kgq(user_grouping):
    """Retrieves known good SQL queries (KGQs) for a specific database from the vector store.

    This function queries the vector store (BigQuery or PostgreSQL) to fetch a limited number of
    distinct user questions and their corresponding generated SQL queries that are relevant to the
    specified database. These KGQs can be used as examples or references for generating new SQL queries.

    Args:
        user_grouping (str): The name of the user grouping for which to retrieve KGQs.

    Returns:
        tuple: A tuple containing two elements:
            - result (str): A JSON-formatted string containing the list of KGQs (user questions and SQL queries),
                            or an error message if an exception occurs.
            - invalid_response (bool): A flag indicating whether an error occurred during retrieval (True)
                                      or if the response is valid (False).

    Raises:
        Exception: If there is an issue connecting to or querying the vector store.
                   The exception message will be included in the returned `result`.
    """  
    try:
        if VECTOR_STORE=='bigquery-vector': 
            sql=f'''SELECT distinct
        example_user_question,
        example_generated_sql 
        FROM
        `{PROJECT_ID}.{BQ_OPENDATAQNA_DATASET_NAME}.example_prompt_sql_embeddings`
        where user_grouping='{user_grouping}'  LIMIT 5 '''

        else:
            sql="""select distinct
        example_user_question,
        example_generated_sql 
        from example_prompt_sql_embeddings
        where user_grouping = '{user_grouping}' LIMIT 5""".format(user_grouping=user_grouping)

        result = vector_connector.retrieve_df(sql)
        result = result.to_json(orient='records')
        invalid_response = False

    except Exception as e:
        result="Issue was encountered while extracting known good sqls in vector store:: "  + str(e)
        invalid_response=True
    return result,invalid_response


############################
####_____EMBED SQL_____#####
############################
async def embed_sql(session_id,user_grouping,user_question,generate_sql):
    """Embeds a generated SQL query into the vector store as an example.

    This asynchronous function takes a user's question, a generated SQL query, and a database name as input.
    It calls the `add_sql_embedding` function to create an embedding of the SQL query and store it in the vector store,
    potentially for future reference as a known good query (KGQ).

    Args:
        user_grouping (str): The name of the grouping associated with the query.
        user_question (str): The user's original question.
        generate_sql (str): The SQL query generated from the user's question.

    Returns:
        tuple: A tuple containing two elements:
            - embedded (str or None): The embedded SQL query if successful, or an error message if an exception occurs.
            - invalid_response (bool): A flag indicating whether an error occurred during embedding (True)
                                      or if the response is valid (False).

    Raises:
        Exception: If there is an issue with the embedding process.
                   The exception message will be included in the returned `embedded` value.
    """ 
    try:
        Rewriter=ResponseAgent('gemini-1.5-pro')

        if session_id is None or session_id=="":
            print("This is a new session")
        else:
            session_history =firestoreconnector.get_chat_logs_for_session(session_id) if USE_SESSION_HISTORY else None
            if session_history is None or not session_history:
                print("No records for the session. Not rewriting the question\n")
            else:
                concated_questions,re_written_qe=Rewriter.rewrite_question(user_question,session_history)
                user_question=re_written_qe
        
        embedded = await add_sql_embedding(user_question, generate_sql,user_grouping)
        invalid_response=False

    except Exception as e: 
        embedded="Issue was encountered while embedding the SQL as example."  + str(e)
        invalid_response=True

    return embedded,invalid_response

def visualize(session_id,user_question,generated_sql,sql_results):
    try:
        Rewriter=ResponseAgent('gemini-1.5-pro')
        
        if session_id is None or session_id=="":
            print("This is a new session")
        else:
            session_history =firestoreconnector.get_chat_logs_for_session(session_id) if USE_SESSION_HISTORY else None
            if session_history is None or not session_history:
                print("No records for the session. Not rewriting the question\n")
            else:
                concated_questions,re_written_qe=Rewriter.rewrite_question(user_question,session_history)
                user_question=re_written_qe
        
        _viz=VisualizeAgent()
        js_chart = _viz.generate_charts(user_question, generate_sql,sql_results)
        invalid_response=False

    except Exception as e: 
        js_chart="Issue was encountered while Generating Charts ::"  + str(e)
        invalid_response=True

    return js_chart,invalid_response


############################
#######_____MAIN_____#######
############################
if __name__ == '__main__': 
    # user_question = "How many movies have review ratings above 5?"
    # user_grouping='MovieExplorer-bigquery'

    parser = argparse.ArgumentParser(description="Open Data QnA SQL Generation")
    parser.add_argument("--session_id", type=str, required=True, help="Session Id")
    parser.add_argument("--user_question", type=str, required=True, help="The user's question.")
    parser.add_argument("--user_grouping", type=str, required=True, help="The user grouping specificed in the source list CSV file")

    # Optional Arguments for run_pipeline Parameters
    parser.add_argument("--run_debugger", action="store_true", help="Enable the debugger (default: False)")
    parser.add_argument("--execute_final_sql", action="store_true", help="Execute the final SQL (default: False)")
    parser.add_argument("--debugging_rounds", type=int, default=2, help="Number of debugging rounds (default: 2)")
    parser.add_argument("--llm_validation", action="store_true", help="Enable LLM validation (default: False)")
    parser.add_argument("--embedder_model", type=str, default='vertex', help="Embedder model name (default: 'vertex')")
    parser.add_argument("--sqlbuilder_model", type=str, default='gemini-1.5-pro', help="SQL builder model name (default: 'gemini-1.0-pro')")
    parser.add_argument("--sqlchecker_model", type=str, default='gemini-1.5-pro', help="SQL checker model name (default: 'gemini-1.0-pro')")
    parser.add_argument("--sqldebugger_model", type=str, default='gemini-1.5-pro', help="SQL debugger model name (default: 'gemini-1.0-pro')")
    parser.add_argument("--responder_model", type=str, default='gemini-1.5-pro', help="Responder model name (default: 'gemini-1.0-pro')")
    parser.add_argument("--num_table_matches", type=int, default=5, help="Number of table matches (default: 5)")
    parser.add_argument("--num_column_matches", type=int, default=10, help="Number of column matches (default: 10)")
    parser.add_argument("--table_similarity_threshold", type=float, default=0.1, help="Threshold for table similarity (default: 0.1)")
    parser.add_argument("--column_similarity_threshold", type=float, default=0.1, help="Threshold for column similarity (default: 0.1)")
    parser.add_argument("--example_similarity_threshold", type=float, default=0.1, help="Threshold for example similarity (default: 0.1)")
    parser.add_argument("--num_sql_matches", type=int, default=3, help="Number of SQL matches (default: 3)")

    args = parser.parse_args()

    # Use Argument Values in run_pipeline
    final_sql, response, _resp = asyncio.run(run_pipeline(args.session_id,
        args.user_question,
        args.user_grouping,
        RUN_DEBUGGER=args.run_debugger,
        EXECUTE_FINAL_SQL=args.execute_final_sql,
        DEBUGGING_ROUNDS=args.debugging_rounds,
        LLM_VALIDATION=args.llm_validation,
        Embedder_model=args.embedder_model,
        SQLBuilder_model=args.sqlbuilder_model,
        SQLChecker_model=args.sqlchecker_model,
        SQLDebugger_model=args.sqldebugger_model,
        Responder_model=args.responder_model,
        num_table_matches=args.num_table_matches,
        num_column_matches=args.num_column_matches,
        table_similarity_threshold=args.table_similarity_threshold,
        column_similarity_threshold=args.column_similarity_threshold,
        example_similarity_threshold=args.example_similarity_threshold,
        num_sql_matches=args.num_sql_matches
    ))

    # user_question = "How many +18 movies have a rating above 4?"

    # final_sql, response, _resp = asyncio.run(run_pipeline(user_question,
    #                                                 'imdb', 
    #                                                 RUN_DEBUGGER=True,
    #                                                 EXECUTE_FINAL_SQL=True,
    #                                                 DEBUGGING_ROUNDS = 2, 
    #                                                 LLM_VALIDATION=True,
    #                                                 Embedder_model='vertex',
    #                                                 SQLBuilder_model= 'gemini-1.0-pro',
    #                                                 SQLChecker_model= 'gemini-1.0-pro',
    #                                                 SQLDebugger_model= 'gemini-1.0-pro',
    #                                                 Responder_model= 'gemini-1.0-pro',
    #                                                 num_table_matches = 5,
    #                                                 num_column_matches = 10,
    #                                                 table_similarity_threshold = 0.1,
    #                                                 column_similarity_threshold = 0.1, 
    #                                                 example_similarity_threshold = 0.1, 
    #                                                 num_sql_matches=3))
    
    
    print("*"*50 +"\nGenerated SQL\n"+"*"*50+"\n"+final_sql)
    print("\n"+"*"*50 +"\nResults\n"+"*"*50)
    print(response)
    print("*"*50 +"\nNatural Response\n"+"*"*50+"\n"+_resp)