instructlab · markmc · Jul 5, 2024 · Jul 2, 2024 · Jul 10, 2024 · Jul 11, 2024
diff --git a/scripts/test_freeform_skills.py b/scripts/test_freeform_skills.py
@@ -5,7 +5,7 @@
 # First Party
 from src.instructlab.sdg import SDG
 from src.instructlab.sdg.default_flows import SynthSkillsFlow
-from src.instructlab.sdg.pipeline import Pipeline
+from src.instructlab.sdg.pipeline import Pipeline, PipelineContext
 
 # for vLLM endpoints, the api_key remains "EMPTY"
 openai_api_key = "EMPTY"
@@ -49,7 +49,9 @@
 
 ds = Dataset.from_list(samples)
 
-skills_flow = SynthSkillsFlow(client, "mixtral", teacher_model, 1).get_flow()
+ctx = PipelineContext(client, "mixtral", teacher_model, 1)
+
+skills_flow = SynthSkillsFlow(ctx).get_flow()
 skills_pipe = Pipeline(skills_flow)
 
 sdg = SDG([skills_pipe])

diff --git a/scripts/test_grounded_skills.py b/scripts/test_grounded_skills.py
@@ -5,7 +5,7 @@
 # First Party
 from src.instructlab.sdg import SDG
 from src.instructlab.sdg.default_flows import SynthGroundedSkillsFlow
-from src.instructlab.sdg.pipeline import Pipeline
+from src.instructlab.sdg.pipeline import Pipeline, PipelineContext
 
 # for vLLM endpoints, the api_key remains "EMPTY"
 openai_api_key = "EMPTY"
@@ -97,7 +97,9 @@
 
 ds = Dataset.from_list(samples)
 
-skills_flow = SynthGroundedSkillsFlow(client, "mixtral", teacher_model, 10).get_flow()
+ctx = PipelineContext(client, "mixtral", teacher_model, 10)
+
+skills_flow = SynthGroundedSkillsFlow(ctx).get_flow()
 skills_pipe = Pipeline(skills_flow)
 
 sdg = SDG([skills_pipe])

diff --git a/scripts/test_knowledge.py b/scripts/test_knowledge.py
@@ -8,7 +8,7 @@
 # First Party
 from src.instructlab.sdg import SDG
 from src.instructlab.sdg.default_flows import MMLUBenchFlow, SynthKnowledgeFlow
-from src.instructlab.sdg.pipeline import Pipeline
+from src.instructlab.sdg.pipeline import Pipeline, PipelineContext
 
 # Please don't add you vLLM endpoint key here
 openai_api_key = "EMPTY"
@@ -38,12 +38,13 @@
 
 ds = Dataset.from_list(samples)
 
-mmlu_flow = MMLUBenchFlow(client, "mixtral", teacher_model, 1).get_flow()
-knowledge_flow = SynthKnowledgeFlow(client, "mixtral", teacher_model, 1).get_flow()
-knowledge_pipe = Pipeline(knowledge_flow)
-mmlu_pipe = Pipeline(mmlu_flow)
+ctx = PipelineContext(client, "mixtral", teacher_model, 1)
 
-sdg = SDG([mmlu_pipe, knowledge_pipe])
+mmlu_flow = MMLUBenchFlow(ctx).get_flow()
+knowledge_flow = SynthKnowledgeFlow(ctx).get_flow()
+knowledge_pipe = Pipeline(mmlu_flow + knowledge_flow)
+
+sdg = SDG([knowledge_pipe])
 mmlubench_data = sdg.generate(ds)
 
 print(mmlubench_data)

diff --git a/src/instructlab/sdg/block.py b/src/instructlab/sdg/block.py
@@ -3,6 +3,7 @@
 from abc import ABC
 from collections import ChainMap
 from typing import Any, Dict, Union
+import os.path
 
 # Third Party
 import yaml
@@ -14,7 +15,8 @@
 
 
 class Block(ABC):
-    def __init__(self, block_name: str) -> None:
+    def __init__(self, ctx, block_name: str) -> None:
+        self.ctx = ctx
         self.block_name = block_name
 
     @staticmethod
@@ -41,8 +43,13 @@ def _load_config(self, config_path: str) -> Union[Dict[str, Any], None]:
         """
         Load the configuration file for this block.
 
+        If the supplied configuration file is a relative path, it is assumed
+        to be part of this Python package.
+
         :param config_path: The path to the configuration file.
         :return: The loaded configuration.
         """
+        if not os.path.isabs(config_path):
+            config_path = os.path.join(self.ctx.sdg_base, config_path)
         with open(config_path, "r", encoding="utf-8") as config_file:
             return yaml.safe_load(config_file)