Docker compose for LlamaServer

Fix unittest failing.
Knox-AAU · Dec 13, 2023 · 816bb54 · 816bb54
1 parent 140cff6
commit 816bb54
Show file tree

Hide file tree

Showing 7 changed files with 56 additions and 23 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -3,7 +3,13 @@ FROM python:3.11-slim
 WORKDIR /code
 
 COPY . .
-RUN pip install --no-cache-dir -r requirements_docker.txt
 
+# Install necessary build tools and dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir -r requirements_docker.txt
 
 CMD ["python", "-u", "-m", "server.server", "--host", "0.0.0.0", "--port", "4444", "--reload"]
diff --git a/concept_linking/main.py b/concept_linking/main.py
@@ -14,10 +14,10 @@ def entity_type_classification(input_data):
     #stringComparisonSolution(input_data)
 
     # Untrained Spacy
-    untrainedSpacySolution(input_data)
+    #untrainedSpacySolution(input_data)
 
     # PromptEngineering
-    #perform_entity_type_classification(input_data)
+    perform_entity_type_classification(input_data)
 
     # Machine Learning
     #predict(input_data)

diff --git a/concept_linking/requirements.txt b/concept_linking/requirements.txt
@@ -1,4 +1,11 @@
-# Other solutions
+# Tools
+#Requirements for LlamaServer
+-r tools/LlamaServer/requirements.txt
+
+#Requirements for OntologyGraphBuilder
+-r tools/OntologyGraphBuilder/requirements.txt
+
+# Solutions
 #Requirements for MachineLearning
 -r solutions/MachineLearning/requirements.txt
 
@@ -11,3 +18,4 @@
 
 #Requirements for UntrainedSpacy
 -r solutions/UntrainedSpacy/requirements.txt
+
diff --git a/concept_linking/solutions/PromptEngineering/main.py b/concept_linking/solutions/PromptEngineering/main.py
@@ -8,8 +8,12 @@
 from relation_extraction.knowledge_graph_messenger import KnowledgeGraphMessenger
 from concept_linking.tools.triple_helper import *
 
-
+# Local API url
 api_url = "http://127.0.0.1:5000/llama"
+
+# Remote API url
+# api_url = "http://knox-proxy01.srv.aau.dk/llama-api/llama"
+
 headers = {"Content-Type": "application/json"}
 
 PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
@@ -58,6 +62,7 @@ def classify_entity_mentions(input_data, output_sentence_test_run):
 
             outer_while_retry_count = 0
             while outer_while_retry_count < max_outer_retries:  # Run until entity is mapped to a provided ontology class
+                found_classification = False
                 outer_while_retry_count += 1
                 print(f'--- RUN Count #{outer_while_retry_count} (Outer While loop) ---')
                 prompt = {key: value.format(
@@ -96,19 +101,21 @@ def classify_entity_mentions(input_data, output_sentence_test_run):
                     classification = match.group(1) if match and match.group(1) in ontology_classes_list else None
 
                     if classification:
+                        found_classification = True
                         # Generate triples if an entity was succesfully classified with the ontology
                         if output_sentence_test_run:
                             triples.append({sentence_key: (content_iri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://dbpedia.org/ontology/" + classification)})
                         else:
                             triples.append((content_iri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://dbpedia.org/ontology/" + classification))
 
                         break  # Exit the while loop if entity is mapped to a provided ontology class
-            if output_sentence_test_run:
-                triples.append({sentence_key: (content_iri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
-                                               "http://dbpedia.org/ontology/unknown")})
-            else:
-                triples.append((content_iri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
-                                "http://dbpedia.org/ontology/unknown"))
+            if not found_classification:
+                if output_sentence_test_run:
+                    triples.append({sentence_key: (content_iri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
+                                                   "http://dbpedia.org/ontology/unknown")})
+                else:
+                    triples.append((content_iri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
+                                    "http://dbpedia.org/ontology/unknown"))
 
     end_time = time.time()
     elapsed_time = round((end_time - start_time), 2)

diff --git a/concept_linking/tools/LlamaServer/Dockerfile b/concept_linking/tools/LlamaServer/Dockerfile
@@ -1,25 +1,22 @@
-# Before you create the docker image from this file, you must have either downloaded the llama-2-7b-chat.Q2_K.gguf file
-# manually yourself, or run the server at least one time.
-
 # Use python as base image
 FROM python
 
 # Set the working directory in the container
 WORKDIR /app
 
-# Copy the current directory contents into the container at /app
-COPY ./llama_cpu_server.py /app/llama_cpu_server.py
-COPY ./llama-2-7b-chat.Q2_K.gguf /app/llama-2-7b-chat.Q2_K.gguf
-COPY ./requirements.txt /app/requirements.txt
+# Copy only the necessary files
+COPY llama_cpu_server.py .
+COPY requirements.txt .
 
+# Install dependencies
 RUN pip install --no-cache-dir -r requirements.txt
 
+# Check if the model file exists, and if not, download it using the provided function
+RUN python -c "from llama_cpu_server import download_model; download_model('llama-2-7b-chat.Q2_K.gguf', 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf?download=true')"
+
+
 # Expose port 5000 outside of the container
 EXPOSE 5000
 
 # Run llama_cpu_server.py when the container launches
 CMD ["python", "llama_cpu_server.py"]
-
-
-#run to build image
-#docker build -t llama-cpu-server .
diff --git a/concept_linking/tools/LlamaServer/docker-compose.yml b/concept_linking/tools/LlamaServer/docker-compose.yml
@@ -0,0 +1,11 @@
+version: '3'
+
+services:
+  llama-cpu-server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "5000:5000"
+    volumes:
+      - ./concept_linking/tools/LlamaServer/llama-2-7b-chat.Q2_K.gguf:/app/concept_linking/tools/LlamaServer/llama-2-7b-chat.Q2_K.gguf
diff --git a/test/test_server/test_server.py b/test/test_server/test_server.py
@@ -9,8 +9,12 @@ def setUp(self):
 
     # Valid: authorized and correct format
     @patch('os.getenv', return_value="some_api_secret")
-    @patch('concept_linking.main.untrainedSpacySolution', return_value=Mock())
+    # @patch('concept_linking.main.stringComparisonSolution', return_value=Mock())
+    # @patch('concept_linking.main.untrainedSpacySolution', return_value=Mock())
+    @patch('concept_linking.main.perform_entity_type_classification', return_value=Mock())
+    # @patch('concept_linking.main.predict', return_value=Mock())
     @patch('relation_extraction.relation_extractor.RelationExtractor.begin_extraction', return_value=Mock())
+
     def test_do_tripleconstruction_valid_post_request(self, mock_begin_extraction, mock_entity_type_classification, mock_os):
         response = self.app.post('/tripleconstruction', data=bytes('{"key": "value"}', 'utf-8'), headers={"Authorization": "some_api_secret"})
         json_response = response.get_json()