Skip to content

Commit

Permalink
Merge pull request #53 from oindrillac/grounded
Browse files Browse the repository at this point in the history
updates to grounded flow
  • Loading branch information
russellb authored Jul 2, 2024
2 parents 45ecc73 + 1f89b1a commit 2788384
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 18 deletions.
2 changes: 1 addition & 1 deletion scripts/test_freeform_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

ds = Dataset.from_list(samples)

skills_flow = SynthSkillsFlow(client, teacher_model).get_flow()
skills_flow = SynthSkillsFlow(client, "mixtral", teacher_model, 1).get_flow()
skills_pipe = Pipeline(skills_flow)

sdg = SDG([skills_pipe])
Expand Down
107 changes: 107 additions & 0 deletions scripts/test_grounded_skills.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Third Party
from datasets import Dataset
from openai import OpenAI

# First Party
from src.instructlab.sdg import SDG
from src.instructlab.sdg.default_flows import SynthGroundedSkillsFlow
from src.instructlab.sdg.pipeline import Pipeline

# for vLLM endpoints, the api_key remains "EMPTY"
openai_api_key = "EMPTY"
openai_api_base = "Add model endpoint here"


client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)

models = client.models.list()
teacher_model = models.data[0].id

samples = [
{
"seed_context": """*Ms. Thompson:* Good morning, everyone. Today, we''re here to discuss
our customer journey mapping and analysis. I believe this is crucial to understanding
our customers'' experiences and improving our services.
*Mr. Patel:* I agree, Lisa. We should start by identifying all touchpoints in
our customer journey, from initial contact to post-sale support.
*Ms. Rodriguez:* Yes, and let''s not forget about the emotional aspect of the
journey. How do our customers feel at each stage? What are their pain points?
*Mr. Kim:* We can use data from our CRM system to track the customer journey and
gather insights. This will help us create a detailed, data-driven map.
*Ms. Johnson:* Once we have the map, we can analyze it to identify areas for improvement.
Perhaps there are steps where customers drop off or express dissatisfaction.
*Mr. Davis:* We should also consider the customer''s perspective. Conducting interviews
or surveys can provide valuable insights into their thoughts and feelings.
*Ms. Brown:* Absolutely. And once we''ve identified areas for improvement, we
can develop strategies to address them. This might involve redesigning certain
touchpoints, enhancing our communication, or streamlining processes.
*Mr. Smith:* And we must remember to measure the impact of any changes we make.
By tracking key performance indicators, we can determine whether our efforts are
successful.
*Ms. Thompson:* Great suggestions, everyone. Let''s divide into teams to tackle
different aspects of the customer journey. Team 1 will focus on pre-sale touchpoints,
Team 2 on sales, and Team 3 on post-sale support.
*Mr. Patel:* Sounds good. We''ll reconvene in a week to share our findings and
discuss next steps.""",
"seed_question": """Generate a company wide email based on the given meeting transcript""",
"task_description": "Writing emails based on minutes of meeting",
"seed_response": """Subject: Recap and Next Steps - Customer Journey Mapping and Analysis\n\
\nDear [Company Name] Team,\n\nI hope this email finds you well. Yesterday, we\
\ gathered to discuss our customer journey mapping and analysis, aiming to understand\
\ our customers' experiences and identify opportunities for improvement. The discussions\
\ were fruitful, and I want to share a brief overview of the key points and outcomes.\n\
\n1. **Identifying Touchpoints:**\n Mr. Patel suggested mapping all touchpoints\
\ in our customer journey, from initial contact to post-sale support.\n\n2. **Emotional\
\ Aspect and Pain Points:**\n Ms. Rodriguez emphasized the importance of considering\
\ the emotional aspect of the journey and identifying customers' pain points at\
\ each stage.\n\n3. **Data-Driven Mapping:**\n Mr. Kim proposed using data from\
\ our CRM system to create a detailed, data-driven customer journey map.\n\n4.\
\ **Customer Perspective:**\n Ms. Johnson recommended gathering insights from\
\ the customer's perspective through interviews or surveys.\n\n5. **Analysis and\
\ Improvement:**\n Ms. Brown suggested analyzing the customer journey map to\
\ identify areas for improvement and developing strategies to address them.\n\n\
6. **Measuring Impact:**\n Mr. Smith stressed the need to measure the impact\
\ of any changes made by tracking key performance indicators.\n\nTo facilitate\
\ a comprehensive analysis, we have divided into teams to tackle different aspects\
\ of the customer journey:\n\n* Team 1: Pre-sale touchpoints\n* Team 2: Sales\n\
* Team 3: Post-sale support\n\nEach team will share their findings and discuss\
\ next steps in a week.\n\nYour engagement and insights have been invaluable in\
\ understanding our customers' experiences and identifying opportunities for improvement.\
\ I look forward to our continued collaboration as we work towards enhancing our\
\ services and delivering exceptional customer experiences.\n\nBest regards,\n\
\n[Your Full Name]\n[Your Position]\n[Company Name]""",
}
]


ds = Dataset.from_list(samples)

skills_flow = SynthGroundedSkillsFlow(client, "mixtral", teacher_model, 10).get_flow()
skills_pipe = Pipeline(skills_flow)

sdg = SDG([skills_pipe])
gen_data = sdg.generate(ds)

print(gen_data)
print(gen_data[0])
4 changes: 2 additions & 2 deletions scripts/test_knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@

ds = Dataset.from_list(samples)

mmlu_flow = MMLUBenchFlow(client, teacher_model).get_flow()
knowledge_flow = SynthKnowledgeFlow(client, teacher_model).get_flow()
mmlu_flow = MMLUBenchFlow(client, "mixtral", teacher_model, 1).get_flow()
knowledge_flow = SynthKnowledgeFlow(client, "mixtral", teacher_model, 1).get_flow()
knowledge_pipe = Pipeline(knowledge_flow)
mmlu_pipe = Pipeline(mmlu_flow)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ examples: |
[End of Score]
generation: |
Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above.
Here's the context, question and the answer you need to evaluate:
[Start of Context]
Expand All @@ -45,7 +46,6 @@ generation: |
{answer}
[End of Answer]
Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above.
* Return the evaluation between [Start of Evaluation] and [End of Evaluation] tags.
* Return the score between [Start of Score] and [End of Score] tags.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ examples: |
[End of Score]
generation: |
Here's the context and question you need to evaluate:
Here's the context and question you need to evaluate. Return the evaluation between [Start of Evaluation] and [End of Evaluation] tags.
[Start of Context]
{context}
Expand Down
4 changes: 2 additions & 2 deletions src/instructlab/sdg/configs/skills/freeform_responses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ examples: |
[End of Response]
generation: |
Now generate a response to the following prompt.
Now generate a response to the following prompt. Remember to use the same style and format as the example above.
[Start of Question]
{question}
[End of Question]
Remember to use the same style and format as the example above. Return the response between [Start of Response] and [End of Response] tags.
Return the response between [Start of Response] and [End of Response] tags.
start_tags: ["[Start of Response]"]
end_tags: ["[End of Response]"]
5 changes: 4 additions & 1 deletion src/instructlab/sdg/configs/skills/grounded_responses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ examples: |
[End of Response]
generation: |
Now generate a response to the following prompt. Remember to use the same style and format as the example above. Return the response between [Start of Response] and [End of Response] tags.
Now generate a response to the following prompt. Remember to use the same style and format as the example above.
Return the response between [Start of Response] and [End of Response] tags.
[Start of Context]
{context}
Expand All @@ -35,6 +36,8 @@ generation: |
{question}
[End of Question]
Return the response between [Start of Response] and [End of Response] tags.
start_tags: ["[Start of Response]"]
end_tags: ["[End of Response]"]
36 changes: 26 additions & 10 deletions src/instructlab/sdg/default_flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .filterblock import FilterByValueBlock
from .iterblock import IterBlock
from .llmblock import LLMBlock
from .utilblocks import CombineColumnsBlock

MODEL_FAMILY_MIXTRAL = "mixtral"
MODEL_FAMILY_MERLINITE = "merlinite"
Expand Down Expand Up @@ -225,8 +226,9 @@ def get_flow(self) -> list:
"block_config": {
"block_name": "filter_relevancy",
"filter_column": "score",
"filter_value": "2",
"filter_value": 2.0,
"operation": operator.eq,
"convert_dtype": float,
"batch_kwargs": {
"num_procs": 8,
},
Expand Down Expand Up @@ -258,8 +260,9 @@ def get_flow(self) -> list:
"block_config": {
"block_name": "filter_verify_question",
"filter_column": "rating",
"filter_value": "1",
"filter_value": 1.0,
"operation": operator.eq,
"convert_dtype": float,
"batch_kwargs": {
"num_procs": 8,
},
Expand Down Expand Up @@ -309,9 +312,9 @@ def get_flow(self) -> list:
"block_config": {
"block_name": "filter_questions",
"filter_column": "score",
"filter_value": 1,
"filter_value": 1.0,
"operation": operator.eq,
"convert_dtype": int,
"convert_dtype": float,
"batch_kwargs": {
"num_procs": 8,
},
Expand Down Expand Up @@ -353,9 +356,9 @@ def get_flow(self) -> list:
"block_config": {
"block_name": "filter_qa_pair",
"filter_column": "score",
"filter_value": 2,
"filter_value": 2.0,
"operation": operator.ge,
"convert_dtype": int,
"convert_dtype": float,
"batch_kwargs": {
"num_procs": 8,
},
Expand Down Expand Up @@ -420,6 +423,7 @@ def get_flow(self) -> list:
"batch_kwargs": {
"num_procs": 8,
"batched": self.batched,
"num_samples": 10,
},
},
},
Expand All @@ -428,9 +432,9 @@ def get_flow(self) -> list:
"block_config": {
"block_name": "filter_grounded_questions",
"filter_column": "score",
"filter_value": 1,
"filter_value": 1.0,
"operation": operator.eq,
"convert_dtype": int,
"convert_dtype": float,
"batch_kwargs": {
"num_procs": 8,
},
Expand Down Expand Up @@ -472,12 +476,24 @@ def get_flow(self) -> list:
"block_config": {
"block_name": "filter_grounded_qa_pair",
"filter_column": "score",
"filter_value": 2,
"filter_value": 2.0,
"operation": operator.ge,
"convert_dtype": int,
"convert_dtype": float,
"batch_kwargs": {
"num_procs": 8,
},
},
},
{
"block_type": CombineColumnsBlock,
"block_config": {
"block_name": "combine_question_and_context",
"columns": ["context", "question"],
"output_col": "question",
"batch_kwargs": {
"num_procs": 8,
"batched": True,
},
},
},
]

0 comments on commit 2788384

Please sign in to comment.