Skip to content

Commit

Permalink
Finetune: replace option 'expand_labels' by 'labels_mode'
Browse files Browse the repository at this point in the history
option 'labels_mode' takes 3 possible values:
- "expand" replaces expand_labels=true - append new vocab from the new data to the pretrained label encoders
- "skip" replaces expand_labels=false - only fit new tasks that haven't been pretrained
- "replace" fits a new MultiLabelEncoder (pretrained params will still be loaded for common vocab entries)
  • Loading branch information
OrianeN committed Apr 17, 2024
1 parent 5851cd2 commit f2bee55
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
12 changes: 8 additions & 4 deletions pie/default_settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
// general task schedule params (can be overwritten in the "settings" entry of each)
"patience": 100, // task patience (global early stopping patience for target task)
"factor": 1, // default task schedule factor
"threshold": 0, // default task schedule thresholed
"threshold": 0, // default task schedule threshold
"min_weight": 0, // default task schedule min_weight

// * Joint LM-loss
Expand All @@ -126,11 +126,15 @@
"load_pretrained_embeddings": "", // file with pretrained embeddings in word2vec format
"load_pretrained_encoder": "", // path to file with pretrained sentence encoder
"freeze_embeddings": false, // whether to freeze the word embeddings
"load_pretrained_model": {
"load_pretrained_model": { // config to load a pretrained model
"pretrained": "", // Path to the .tar file with the pretrained model
"exclude": [], // Modules to exclude from state_dict loading
"expand_labels": true // whether expand label lists (word/char + tasks vocabs)
}, // config to load a pretrained model
"labels_mode": "expand" // mode for creating the MultiLabelEncoder (word/char + tasks vocabs).
// Options = {"expand", "skip", "replace"}:
// - "expand" expand existing pretrained label lists
// - "skip": only fit new encoders (for fine-tuning on a new task)
// - "replace": fit a new MultiLabelEncoder only with the finetuning data
},

// * Optimization
"dropout": 0.0, // dropout
Expand Down
12 changes: 8 additions & 4 deletions pie/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,27 @@ def run(settings):
print()

# label encoder
if settings.load_pretrained_model.get("pretrained"):
labels_mode = settings.load_pretrained_model.get("labels_mode")
labels_mode_accepted = ["expand", "replace", "skip"]
assert labels_mode in labels_mode_accepted, \
f"Invalid value for labels_mode ({labels_mode}), accepted values are {labels_mode_accepted}"
if settings.load_pretrained_model.get("pretrained") and labels_mode != "replace":
label_encoder = MultiLabelEncoder.load_from_pretrained_model(
path=settings.load_pretrained_model["pretrained"],
new_settings=settings,
tasks=[t["name"] for t in settings.tasks]
)
if settings.load_pretrained_model.get("expand_labels") is True:
if settings.load_pretrained_model.get("labels_mode") == "expand":
if settings.verbose:
print("::: Fitting/Expanding MultiLabelEncoder with data :::")
print()
label_encoder.fit_reader(reader, expand_mode=True)
else:
else: # "skip"
if settings.verbose:
print("::: Fitting MultiLabelEncoder with data (unfitted LabelEncoders only) :::")
print()
label_encoder.fit_reader(reader, skip_fitted=True)
else:
else: # train from scratch or labels_mode== "replace"
label_encoder = MultiLabelEncoder.from_settings(settings, tasks=tasks)
if settings.verbose:
print("::: Fitting MultiLabelEncoder with data :::")
Expand Down

0 comments on commit f2bee55

Please sign in to comment.