Skip to content

Commit

Permalink
Merge pull request #74 from markmc/resolve-todos
Browse files Browse the repository at this point in the history
Resolve some trivial TODOs in generate_data()
  • Loading branch information
russellb authored Jul 3, 2024
2 parents afbea4c + 51df195 commit bcb7974
Showing 1 changed file with 9 additions and 26 deletions.
35 changes: 9 additions & 26 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,33 +178,25 @@ def _sdg_init(pipeline, client, model_family, model_name, num_iters, batched):
def generate_data(
logger,
api_base,
tls_insecure,
model_family: str,
yaml_rules: Optional[str] = None,
output_dir: Optional[str] = None,
taxonomy: Optional[str] = None,
taxonomy_base: Optional[str] = None,
# TODO - not used and should be removed from the CLI
prompt_file_path: Optional[str] = None,
api_key: Optional[str] = None,
model_family: Optional[str] = None,
model_name: Optional[str] = None,
# TODO - not used -- when batching is enabled, this is relevant.
# Right now the code hard codes 8 cpus for batching
num_cpus: Optional[int] = None,
num_instructions_to_generate: Optional[int] = 30,
# TODO - not used, can probably be removed
num_prompt_instructions=2,
# TODO - determine if this is relevant
request_batch_size=5,
# TODO - probably should be removed
temperature=1.0, # temperature per step is provided in the config file
# TODO - probably should be removed
top_p=1.0,
taxonomy: Optional[str] = None,
taxonomy_base: Optional[str] = None,
output_dir: Optional[str] = None,
# TODO - not used and should be removed from the CLI
prompt_file_path: Optional[str] = None,
# TODO - probably should be removed
rouge_threshold: Optional[float] = None,
console_output=True,
api_key: Optional[str] = None,
yaml_rules: Optional[str] = None,
chunk_word_count=None,
server_ctx_size=None,
tls_insecure=False,
tls_client_cert: Optional[str] = None,
tls_client_key: Optional[str] = None,
tls_client_passwd: Optional[str] = None,
Expand Down Expand Up @@ -283,15 +275,6 @@ def generate_data(
else:
sdg = sdg_freeform_skill

if not sdg:
# TODO - can be removed once the "full" pipelines are all defined,
# as there shouldn't be a code path to get here anymore
raise utils.GenerateException(
"Error: No SDG pipeline for this leaf node type: %s" % samples[0]
)

# TODO -- there is a parameter for how many samples to generate, but we ignore it so far

logger.debug("Samples: %s" % samples)
ds = Dataset.from_list(samples)
logger.debug("Dataset: %s" % ds)
Expand Down

0 comments on commit bcb7974

Please sign in to comment.