Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes #38 v3 includes some backwards incompatible changes to the knowledge schema format. Here is a diff against v2. The changes are: - Q&A pairs now have an associated context blob from the knowledge document. - There is new "document_outline" field. ```diff --- src/instructlab/schema/v2/knowledge.json 2024-07-17 12:56:37 +++ src/instructlab/schema/v3/knowledge.json 2024-07-17 13:14:56 @@ -8,7 +8,8 @@ "domain", "task_description", "seed_examples", - "document" + "document", + "document_outline" ], "unevaluatedProperties": false, "properties": { @@ -44,20 +45,37 @@ "items": { "type": "object", "required": [ - "question", - "answer" + "context", + "questions_and_answers" ], "unevaluatedProperties": false, "properties": { - "question": { - "description": "A question used for synthetic data generation.", + "context": { + "description": "A context used for synthetic data generation.", "type": "string", "minLength": 1 }, - "answer": { - "description": "The desired response for the question.", - "type": "string", - "minLength": 1 + "questions_and_answers": { + "type": "array", + "minItems": 3, + "uniqueItems": true, + "items": { + "type": "object", + "required": [ + "question", + "answer" + ], + "properties": { + "question": { + "description": "A question used for synthetic data generation.", + "type": "string", + "minLength": 1 + }, + "answer": { + "description": "The desired response for the question.", + "type": "string", + "minLength": 1 + } + } + } } } } @@ -104,6 +122,11 @@ } } } + }, + "document_outline": { + "description": "An outline of the document.", + "type": "string", + "minLength": 1 } } } ``` Signed-off-by: Russell Bryant <[email protected]>
- Loading branch information