Skip to content

Commit

Permalink
Merge pull request #86 from markmc/flow-config-file-format
Browse files Browse the repository at this point in the history
Add a YAML based file format for pipelines
  • Loading branch information
russellb authored Jul 13, 2024
2 parents bcf1070 + 2c52770 commit 07a17ed
Show file tree
Hide file tree
Showing 23 changed files with 869 additions and 617 deletions.
16 changes: 12 additions & 4 deletions scripts/test_freeform_skills.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
# Standard
from importlib import resources

# Third Party
from datasets import Dataset
from openai import OpenAI

# First Party
from src.instructlab.sdg import SDG
from src.instructlab.sdg.default_flows import SynthSkillsFlow
from src.instructlab.sdg.pipeline import Pipeline
from src.instructlab.sdg.pipeline import (
FULL_PIPELINES_PACKAGE,
Pipeline,
PipelineContext,
)

# for vLLM endpoints, the api_key remains "EMPTY"
openai_api_key = "EMPTY"
Expand Down Expand Up @@ -49,8 +55,10 @@

ds = Dataset.from_list(samples)

skills_flow = SynthSkillsFlow(client, "mixtral", teacher_model, 1).get_flow()
skills_pipe = Pipeline(skills_flow)
ctx = PipelineContext(client, "mixtral", teacher_model, 1)

with resources.path(FULL_PIPELINES_PACKAGE, "freeform_skills.yaml") as yaml_path:
skills_pipe = Pipeline.from_file(ctx, yaml_path)

sdg = SDG([skills_pipe])
gen_data = sdg.generate(ds)
Expand Down
16 changes: 12 additions & 4 deletions scripts/test_grounded_skills.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
# Standard
from importlib import resources

# Third Party
from datasets import Dataset
from openai import OpenAI

# First Party
from src.instructlab.sdg import SDG
from src.instructlab.sdg.default_flows import SynthGroundedSkillsFlow
from src.instructlab.sdg.pipeline import Pipeline
from src.instructlab.sdg.pipeline import (
FULL_PIPELINES_PACKAGE,
Pipeline,
PipelineContext,
)

# for vLLM endpoints, the api_key remains "EMPTY"
openai_api_key = "EMPTY"
Expand Down Expand Up @@ -97,8 +103,10 @@

ds = Dataset.from_list(samples)

skills_flow = SynthGroundedSkillsFlow(client, "mixtral", teacher_model, 10).get_flow()
skills_pipe = Pipeline(skills_flow)
ctx = PipelineContext(client, "mixtral", teacher_model, 10)

with resources.path(FULL_PIPELINES_PACKAGE, "grounded_skills.yaml") as yaml_path:
skills_pipe = Pipeline.from_file(ctx, yaml_path)

sdg = SDG([skills_pipe])
gen_data = sdg.generate(ds)
Expand Down
18 changes: 11 additions & 7 deletions scripts/test_knowledge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Standard
from importlib import resources
import operator

# Third Party
Expand All @@ -7,8 +8,11 @@

# First Party
from src.instructlab.sdg import SDG
from src.instructlab.sdg.default_flows import MMLUBenchFlow, SynthKnowledgeFlow
from src.instructlab.sdg.pipeline import Pipeline
from src.instructlab.sdg.pipeline import (
FULL_PIPELINES_PACKAGE,
Pipeline,
PipelineContext,
)

# Please don't add you vLLM endpoint key here
openai_api_key = "EMPTY"
Expand Down Expand Up @@ -38,12 +42,12 @@

ds = Dataset.from_list(samples)

mmlu_flow = MMLUBenchFlow(client, "mixtral", teacher_model, 1).get_flow()
knowledge_flow = SynthKnowledgeFlow(client, "mixtral", teacher_model, 1).get_flow()
knowledge_pipe = Pipeline(knowledge_flow)
mmlu_pipe = Pipeline(mmlu_flow)
ctx = PipelineContext(client, "mixtral", teacher_model, 1)

with resources.path(FULL_PIPELINES_PACKAGE, "knowledge.yaml") as yaml_path:
knowledge_pipe = Pipeline.from_file(ctx, yaml_path)

sdg = SDG([mmlu_pipe, knowledge_pipe])
sdg = SDG([knowledge_pipe])
mmlubench_data = sdg.generate(ds)

print(mmlubench_data)
Expand Down
12 changes: 11 additions & 1 deletion src/instructlab/sdg/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from abc import ABC
from collections import ChainMap
from typing import Any, Dict, Union
import os.path

# Third Party
import yaml
Expand All @@ -14,7 +15,9 @@


class Block(ABC):
def __init__(self, block_name: str) -> None:
def __init__(self, ctx, pipe, block_name: str) -> None:
self.ctx = ctx
self.pipe = pipe
self.block_name = block_name

@staticmethod
Expand All @@ -41,8 +44,15 @@ def _load_config(self, config_path: str) -> Union[Dict[str, Any], None]:
"""
Load the configuration file for this block.
If the supplied configuration file is a relative path, it is assumed
to be part of this Python package.
:param config_path: The path to the configuration file.
:return: The loaded configuration.
"""
if not os.path.isabs(config_path):
config_path = os.path.join(
os.path.dirname(self.pipe.config_path), config_path
)
with open(config_path, "r", encoding="utf-8") as config_file:
return yaml.safe_load(config_file)
Loading

0 comments on commit 07a17ed

Please sign in to comment.