i-dot-ai · alexdzm · Nov 21, 2024 · Nov 21, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/frontend/pages/index.tsx b/frontend/pages/index.tsx
@@ -1,9 +1,9 @@
-"use client"; // This is a client component
+"use client";
 
-import React, { useEffect, useState, useContext } from 'react';
+import React, { useEffect, useState } from 'react';
 import PieChart from '../components/PieChart';
 import { getGateUrl } from '../utils/getGateUrl';
-import { fetchUser, fetchReadItemsByAttribute, fetchItems } from '../utils/api'
+import { fetchUser, fetchReadItemsByAttribute, fetchItems } from '../utils/api';
 
 interface Result {
     answer: string;
@@ -21,13 +21,26 @@ interface Criterion {
     gate: string;
 }
 
+// Map of gate identifiers to their human-readable format
+const gateMap: { [key: string]: string } = {
+    'GATE_0': 'Gate 0',
+    'GATE_1': 'Gate 1',
+    'GATE_2': 'Gate 2',
+    'GATE_3': 'Gate 3',
+    'GATE_4': 'Gate 4',
+    'GATE_5': 'Gate 5'
+};
+
+const getBaseName = (name: string): string => name.split('-')[0];
+
 const Summary: React.FC = () => {
     const [chartData, setChartData] = useState<number[]>([]);
     const [chartLabels, setChartLabels] = useState<string[]>([]);
     const [summaryText, setSummaryText] = useState<string>('');
     const [gateUrl, setGateUrl] = useState<string | null>(null);
     const [categories, setCategories] = useState<{ [key: string]: number }>({});
     const [projectDetails, setProjectDetails] = useState<any>(null);
+    const [reviewType, setReviewType] = useState<string>('');
 
     useEffect(() => {
         const fetchData = async () => {
@@ -44,6 +57,15 @@ const Summary: React.FC = () => {
                 };
 
                 const criteria = await Promise.all(results.map(fetchCriteria));
+
+                // Calculate review type based on unique gates
+                const uniqueGates = new Set(criteria.map(criterion => criterion.gate));
+                const formattedGates = Array.from(uniqueGates)
+                    .map(gate => gateMap[gate] || gate)
+                    .sort()
+                    .join(', ');
+                setReviewType(formattedGates);
+
                 const fetchedCategories = criteria.map(criterion => criterion.category);
                 console.log('Criterion fetched:', fetchedCategories);
 
@@ -88,43 +110,30 @@ const Summary: React.FC = () => {
                 <br />
                 <div className="summary-card" style={{ display: 'flex', alignItems: 'center', marginBottom: '20px', marginTop: '40px' }}>
                     <div style={{ flex: 1 }}>
-                        <h2>Welcome to <strong>Scout!</strong></h2>
                         <p>
-                            {gateUrl && (
-                                <>
-                                    This AI tool helps you navigate your document set before your review. Please check the details below are correct before continuing
-                                    <ul>
-                                        <strong>Review Type:</strong> {projectDetails.review_type} <br />
-                                        <strong>Project Name:</strong> {projectDetails.name}
-                                    </ul>
-                                    This tool has preprocessed your documents and analysed them against the questions in the
-                                    <a href={gateUrl} target="_blank" rel="noopener noreferrer">
-                                        {projectDetails.review_type} workbook
-                                    </a>.
-                                </>
-                            )}
+                            Scout helps you navigate your document set before your review. Please check the details below are correct before continuing.
+                            <ul>
+                                Project Name:<strong> {projectDetails.name ? getBaseName(projectDetails.name) : projectDetails.name}</strong><br />
+                                Review Type:<strong> {reviewType} </strong>
+
+                            </ul>
+                            This tool has preprocessed your documents and analysed them against the questions in the {reviewType} workbook. Head to the Results tab to see analysis for each criterion in the gate workbook, click on the links to files to see what evidence has been used to form that conclusion.
                         </p>
                     </div>
                 </div>
             </div>
 
             <div className="summary-card" style={{ display: 'flex', alignItems: 'top', marginBottom: '20px' }}>
                 <div style={{ flex: 1 }}>
-                    <h2>Review Summary</h2>
-                    <p>{summaryText}</p>
+                    <h2>Overview</h2>
+                    <div dangerouslySetInnerHTML={{ __html: summaryText }} />
                 </div>
                 <div className="chart-container" style={{ flex: 1 }}>
                     <PieChart data={chartData} labels={chartLabels} />
                 </div>
             </div>
-            {Object.keys(categories).map(category => (
-                <div className="summary-card" key={category} style={{ marginBottom: '20px' }}>
-                    <h2>{category}</h2>
-                    <p>{`Number of negative results: ${categories[category]}`}</p>
-                </div>
-            ))}
         </div>
     );
 };
 
-export default Summary;
+export default Summary;
diff --git a/libreoffice_service/app.py b/libreoffice_service/app.py
@@ -39,27 +39,42 @@ class HealthResponse(BaseModel):
     status: bool
 
 
-def transform_file_path(input_path: str) -> str:
-    # Split the path into directory and filename
-    directory, filename = os.path.split(input_path)
-
-    # Split the directory into parts
-    dir_parts = directory.split(os.sep)
-
-    # Remove the last directory and replace it with "processed"
-    if len(dir_parts) > 1:
-        dir_parts[-1] = "processed"
-    else:
-        dir_parts.append("processed")
-
-    # Join the directory parts back together
-    new_directory = os.sep.join(dir_parts)
-
-    # Get the filename without extension and add .pdf
-    new_filename = os.path.splitext(filename)[0] + ".pdf"
-
-    # Join the new directory and filename
-    return os.path.join(new_directory, new_filename)
+def transform_file_path(input_path):
+    """
+    Transform a file path by replacing 'raw' directory with 'processed' and changing extension to .pdf
+    while maintaining the rest of the directory structure.
+
+    Args:
+        input_path (str): Original file path (e.g., 'project/raw/subfolder/file.docx')
+
+    Returns:
+        str: Transformed file path (e.g., 'project/processed/subfolder/file.pdf')
+
+    Example:
+        >>> transform_file_path('example_project/raw/test_folder/document.docx')
+        'example_project/processed/test_folder/document.pdf'
+    """
+    # Normalize path separators for the current OS
+    input_path = os.path.normpath(input_path)
+
+    # Split the path into parts
+    parts = input_path.split(os.sep)
+
+    # Find the 'raw' directory index
+    try:
+        raw_index = parts.index('raw')
+    except ValueError:
+        raise ValueError("Input path must contain a 'raw' directory")
+
+    # Replace 'raw' with 'processed'
+    parts[raw_index] = 'processed'
+
+    # Get the filename and change extension to .pdf
+    filename = os.path.splitext(parts[-1])[0] + '.pdf'
+    parts[-1] = filename
+
+    # Join all parts back together
+    return os.sep.join(parts)
 
 
 @app.get("/health")

diff --git a/scout/LLMFlag/evaluation.py b/scout/LLMFlag/evaluation.py
@@ -8,7 +8,7 @@
 from openai import APIConnectionError, APIError, OpenAI, RateLimitError
 from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
 
-from scout.DataIngest.models.schemas import Chunk, CriterionCreate, File, ProjectCreate, ResultCreate
+from scout.DataIngest.models.schemas import Chunk, CriterionCreate, File, ProjectCreate,ProjectUpdate, ResultCreate
 from scout.LLMFlag.prompts import (
     CORE_SCOUT_PERSONA,
     DOCUMENT_EXTRACT_PROMPT,
@@ -19,6 +19,7 @@
     USER_EVIDENCE_POINTS_PROMPT,
     USER_QUESTION_PROMPT,
     USER_REGENERATE_HYPOTHESIS_PROMPT,
+    SUMMARIZE_RESPONSES_PROMPT
 )
 from scout.LLMFlag.retriever import ReRankRetriever
 from scout.utils.storage.storage_handler import BaseStorageHandler
@@ -49,6 +50,7 @@ class BaseEvaluator(ABC):
     def __init__(self):
         """Initialise the evaluator"""
         self.hypotheses = "None"
+        self.temp=0
 
     @abstractmethod
     def evaluate_question(self, criteria_uuid: str) -> List[str]:
@@ -112,6 +114,7 @@ def answer_question(
                     )
                     evidence_response = api_call_with_retry(
                         self.llm.chat.completions.create,
+                        temperature=self.temp,
                         model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
                         messages=[
                             {
@@ -139,6 +142,7 @@ def answer_question(
 
             question_response = api_call_with_retry(
                 self.llm.chat.completions.create,
+                temperature=self.temp,
                 model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
                 messages=[
                     {"role": "system", "content": SYSTEM_QUESTION_PROMPT},
@@ -161,6 +165,7 @@ def answer_question(
 
             hypotheses_response = api_call_with_retry(
                 self.llm.chat.completions.create,
+                temperature=0.5,
                 model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
                 messages=[
                     {"role": "system", "content": CORE_SCOUT_PERSONA},
@@ -199,7 +204,7 @@ def __init__(
         storage_handler: BaseStorageHandler,
     ):
         """Initialise the evaluator"""
-        self.hypotheses = "None"
+        super().__init__()
         self.vector_store = vector_store
         self.llm = llm
         self.storage_handler = storage_handler
@@ -237,23 +242,23 @@ def evaluate_questions(self, criteria: List[CriterionCreate], k: int = 3, save:
         logger.info("Generating summary of answers...")
         # Generate summary of answers
         summary = self.generate_summary(question_answer_pairs)
-        self.project.results_summary = summary
-        self.storage_handler.update_item(self.project)
+        project_update=ProjectUpdate(id=self.project.id,name=self.project.name, results_summary=summary)
+        self.storage_handler.update_item(project_update)
         return results
 
     def generate_summary(self, question_answer_pairs: List[tuple]) -> str:
         """Generate a summary of the answers using an LLM, with an input prompt containing instructions."""
 
-        SUMMARIZE_RESPONSES_PROMPT = """You are a project delivery expert, you will be given question and answer pairs about a government project. Return a summary of the most important themes, you do not need to summarise all the questions, only return important, specific information. Be specific about project detail referred to. Return no more than 3 sentences. {qa_pairs}"""
-
+
         formatted_input = ", ".join([f"Question: {qa[0]}\nAnswer: {qa[1]}" for qa in question_answer_pairs])
         response = api_call_with_retry(
             self.llm.chat.completions.create,
+            temperature=self.temp,
             model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
             messages=[
                 {
                     "role": "user",
-                    "content": SUMMARIZE_RESPONSES_PROMPT.format(qa_pairs=formatted_input),
+                    "content": SUMMARIZE_RESPONSES_PROMPT.format(qa_pairs=formatted_input,hypotheses=self.hypotheses),
                 },
             ],
         )
@@ -270,13 +275,14 @@ def model(criterion: CriterionCreate, k: int = 3):
                 k=k,
             )
 
-            extracted_words = re.findall(r"\b(positive|neutral|negative)\b", full_text, re.IGNORECASE)
+            # find any sentiment words that might be in square brackets
+            extracted_words = re.findall(r'\[?(positive|neutral|negative)\]?', full_text, re.IGNORECASE)
 
             if extracted_words:
                 answer = extracted_words[-1].title()
                 # Remove the key words and brackets from full_text
                 full_text = re.sub(
-                    r"\b(positive|neutral|negative)\b",
+                    r'\[?(positive|neutral|negative)\]?',
                     "",
                     full_text,
                     flags=re.IGNORECASE,

diff --git a/scout/LLMFlag/prompts.py b/scout/LLMFlag/prompts.py
@@ -161,17 +161,38 @@
 
 
 #
-# For section summaries
+# For summaries
 #
-_summarise_core_prompt = """
-You are an an expert project delivery adviser from the UK Government's Infrastructure and Projects Authority. \
-You are given a list of questions that flag problems related to a project. \
-Return a summary of the overall themes the questions are trying uncover. \
-The summary must be at most 3 sentences long. \
-Questions:
-{questions}
+
+_summarise_core_prompt  = """
+<start of task>
+You will be given question and answer pairs about a government project. Return a summary of the most important themes and some lines of enquiry.
+<end of task>
+<start of rules>
+You do not need to summarise all the questions, only return important, specific information.
+Be specific about project detail referred to.
+Return no more than 5 sentences.
+Recommend at most 3 lines of enquiry at the end, and explicitly call them a suggested lines of enquiry.
+Lines of enquiry are points to investigate further in interviews with the project team.
+To form your lines of lines of enquiry, use the hypotheses formed during the investigation.
+Use simple html text formatting tags, <strong> and <br /> for bold and line breaks between items and sections of your response.
+There must be a line break before the suggested lines of enquiry section.
+Do not use Markdown formatting.
+Do not use any extra formatting for font type size or color or anything else.
+Do not include a title in your answer.
+<end of rules>
+<start of hypotheses>
+{hypotheses}
+<end of hypotheses>
+<start of question and answer paris>
+{qa_pairs}
+<end of question and answer paris>
 """
 
+SUMMARIZE_RESPONSES_PROMPT=CORE_SCOUT_PERSONA+_summarise_core_prompt
+
+
+
 SUMMARISE_OUTPUTS_PROMPT = PromptTemplate.from_template(_summarise_core_prompt)
 
 
@@ -188,3 +209,4 @@
 Extract:{text}
 =======
 """
+
diff --git a/scout/Pipelines/ingest_project_data.py b/scout/Pipelines/ingest_project_data.py
@@ -111,7 +111,7 @@ def ingest_project_files(
     # Upload files to s3
     s3_file_keys = s3_storage_handler.upload_folder_contents(
         str(project_folder_path),
-        recursive=False,
+        recursive=True,
         prefix=sanitise_project_name(project.name) + "/raw/",
     )
     logger.info(f"Uploaded {s3_file_keys} files to s3")