From d1147947c061964ff3dd69b7acda67c9269c6a1b Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Wed, 3 Aug 2022 16:25:47 +0800
Subject: [PATCH 01/20] [#57] Add momentum model and config and train script

---
 sgnlp/models/coherence_momentum/__init__.py   |   0
 sgnlp/models/coherence_momentum/config.py     |  42 ++
 sgnlp/models/coherence_momentum/modeling.py   | 111 +++++
 sgnlp/models/coherence_momentum/train.py      | 381 ++++++++++++++++++
 .../models/coherence_momentum/train_config.py |  18 +
 5 files changed, 552 insertions(+)
 create mode 100644 sgnlp/models/coherence_momentum/__init__.py
 create mode 100755 sgnlp/models/coherence_momentum/config.py
 create mode 100644 sgnlp/models/coherence_momentum/modeling.py
 create mode 100755 sgnlp/models/coherence_momentum/train.py
 create mode 100644 sgnlp/models/coherence_momentum/train_config.py

diff --git a/sgnlp/models/coherence_momentum/__init__.py b/sgnlp/models/coherence_momentum/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sgnlp/models/coherence_momentum/config.py b/sgnlp/models/coherence_momentum/config.py
new file mode 100755
index 0000000..9c1b8d3
--- /dev/null
+++ b/sgnlp/models/coherence_momentum/config.py
@@ -0,0 +1,42 @@
+from transformers import PretrainedConfig
+
+
+class CoherenceConfig(PretrainedConfig):
+    model_type = "general_coherence_model"
+
+    def __init__(
+        self,
+        model_size: str = "base",
+        lr_start: float = 5e-06,
+        lr_end: float = 1e-06,
+        lr_anneal_epochs: int = 50,
+        eval_interval: int = 1000,
+        seed: int = 100,
+        batch_size: int = 1,
+        margin: float = 0.1,
+        num_negs: int = 5,
+        max_len: int = 600,
+        num_rank_negs: int = 50,
+        train_steps: int = 200,
+        momentum_coefficient: float = 0.9999999,
+        queue_size: int = 1000,
+        contrastive_loss_weight: float = 0.85,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+
+        self.model_size = model_size
+        self.lr_start = lr_start
+        self.lr_end = lr_end
+        self.lr_anneal_epochs = lr_anneal_epochs
+        self.eval_interval = eval_interval
+        self.seed = seed
+        self.batch_size = batch_size
+        self.margin = margin
+        self.num_negs = num_negs
+        self.max_len = max_len
+        self.num_rank_negs = num_rank_negs
+        self.train_steps = train_steps
+        self.momentum_coefficient = momentum_coefficient
+        self.queue_size = queue_size
+        self.contrastive_loss_weight = contrastive_loss_weight
diff --git a/sgnlp/models/coherence_momentum/modeling.py b/sgnlp/models/coherence_momentum/modeling.py
new file mode 100644
index 0000000..54273dd
--- /dev/null
+++ b/sgnlp/models/coherence_momentum/modeling.py
@@ -0,0 +1,111 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import XLNetModel, XLNetConfig
+from transformers import PreTrainedModel
+from .config import CoherenceConfig
+
+
+class MomentumPreTrainedModel(PreTrainedModel):
+    config_class = CoherenceConfig
+    base_model_prefix = "momentum"
+
+
+class MomentumModel(PreTrainedModel):
+    def __init__(self, config):
+
+        super().__init__(config)
+        self.batch_size = config.batch_size
+        self.momentum_coefficient = config.momentum_coefficient
+
+        self.encoder_name = f"xlnet-{config.model_size}-cased"
+        self.encoder_config = XLNetConfig.from_pretrained(self.encoder_name)
+        self.main_encoder = XLNetModel(self.encoder_config)
+        self.momentum_encoder = XLNetModel(self.encoder_config)
+
+        if config.model_size == "base":
+            hidden_size = 768
+        elif config.model_size == "large":
+            hidden_size = 1024
+
+        self.queue = []
+        self.queue_size = config.queue_size
+        self.con_loss_weight = config.contrastive_loss_weight
+        self.negs = config.num_negs
+        self.margin = config.margin
+        self.cosim = nn.CosineSimilarity()
+        self.crossEntropy = nn.CrossEntropyLoss()
+        self.getTranspose = lambda x: torch.transpose(x, -2, -1)
+        self.subMargin = lambda z: z - config.margin
+
+        self.conlinear = nn.Linear(hidden_size, 1)
+
+    def init_encoders(self):
+        self.main_encoder = XLNetModel.from_pretrained(self.encoder_name)
+        self.momentum_encoder = XLNetModel.from_pretrained(self.encoder_name)
+
+    def get_main_score(self, doc):
+        rep = self.main_encoder(input_ids=doc).last_hidden_state[:, -1, :]
+        score = self.conlinear(rep).view(-1)
+        return score
+
+    def get_momentum_rep(self, doc):
+        rep = self.momentum_encoder(input_ids=doc).last_hidden_state[:, -1, :]
+        return rep.detach()
+
+    def get_cos_sim(self, pos_rep, pos_slice):
+        pos_sim = self.cosim(pos_rep, pos_slice)
+        neg_sims = [self.cosim(pos_rep, neg_x.view(1, -1)) for neg_x in self.queue]
+        return pos_sim, neg_sims
+
+    def update_momentum_encoder(self):
+        with torch.no_grad():
+            for main, moco in zip(
+                self.main_encoder.parameters(), self.momentum_encoder.parameters()
+            ):
+                moco.data = (moco.data * self.momentum_coefficient) + (
+                    main.data * (1 - self.momentum_coefficient)
+                )
+
+    def forward(self, pos_doc, pos_slice, neg_docs):
+        pos_rep = self.main_encoder(input_ids=pos_doc).last_hidden_state[:, -1, :]
+        pos_score = self.conlinear(pos_rep).view(-1)
+
+        pos_slice_rep = self.get_momentum_rep(pos_slice)
+
+        neg_scores = list(map(self.get_main_score, list(neg_docs)))
+        neg_moco_rep = list(map(self.get_momentum_rep, list(neg_docs)))
+
+        if len(self.queue) >= self.queue_size:  # global negative queue size
+            del self.queue[: self.negs]
+        self.queue.extend(neg_moco_rep[0])
+
+        pos_sim, neg_sims = self.get_cos_sim(pos_rep, pos_slice_rep)
+
+        sim_contra_loss = self.sim_contrastive_loss(pos_sim, neg_sims)
+        contra_loss = self.contrastive_loss(pos_score, neg_scores[0])
+
+        full_loss = (self.con_loss_weight * contra_loss) + (
+            (1 - self.con_loss_weight) * sim_contra_loss
+        )
+
+        return full_loss
+
+    def eval_forward(self, pos_doc, neg_docs):
+        pos_score = self.get_main_score(pos_doc)
+        neg_scores = torch.stack(list(map(self.get_main_score, list(neg_docs))))
+        return pos_score.detach(), neg_scores[0].detach()
+
+    def sim_contrastive_loss(self, pos_sim, neg_sims):
+        neg_sims_sub = torch.stack(list(map(self.subMargin, neg_sims))).view(-1)
+        all_sims = torch.cat((neg_sims_sub, pos_sim), dim=-1)
+        lsmax = -1 * F.log_softmax(all_sims, dim=-1)
+        loss = lsmax[-1]
+        return loss
+
+    def contrastive_loss(self, pos_score, neg_scores):
+        neg_scores_sub = torch.stack(list(map(self.subMargin, neg_scores)))
+        all_scores = torch.cat((neg_scores_sub, pos_score), dim=-1)
+        lsmax = -1 * F.log_softmax(all_scores, dim=-1)
+        pos_loss = lsmax[-1]
+        return pos_loss
diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
new file mode 100755
index 0000000..d5d287a
--- /dev/null
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -0,0 +1,381 @@
+import pickle
+import torch
+import time
+import os
+import datetime
+import random
+from torch.utils.data import Dataset, DataLoader, SequentialSampler
+from transformers import AdamW
+from torch.optim.swa_utils import SWALR
+from transformers import XLNetTokenizer
+from modeling import MomentumModel
+
+
+class MomentumDataset(Dataset):
+    def __init__(self, fname, model, device, datatype, negs, max_len):
+        self.fname = fname
+        self.device = device
+        self.data = pickle.load(open(fname, "rb"))
+        random.shuffle(self.data)
+        self.tokenizer = XLNetTokenizer.from_pretrained("xlnet-{}-cased".format(model))
+        self.truncount = 0
+        self.datatype = datatype
+        self.negs = negs
+        self.max_len = max_len
+
+    def pad_ids(self, ids):
+        if len(ids) < self.max_len:
+            padding_size = self.max_len - len(ids)
+            padding = [
+                self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
+                for i in range(padding_size)
+            ]
+            ids = ids + padding
+        else:
+            ids = ids[: self.max_len]
+            self.truncount += 1
+
+        return ids
+
+    def prepare_data(self, idx):
+        pos_doc = self.data[idx]["pos"]
+
+        if self.datatype == "single":
+            neg_docs = [self.data[idx]["neg"]]
+        elif self.datatype == "multiple":
+            neg_docs = self.data[idx]["negs"][: self.negs]
+        else:
+            raise Exception("Unexpected datatype")
+
+        pos_span = pos_doc
+        pos_span = " ".join(pos_span)
+        pos_tokens = self.tokenizer.tokenize(pos_span)
+        pos_ids = self.tokenizer.convert_tokens_to_ids(pos_tokens)
+        pos_ids = self.pad_ids(pos_ids)
+
+        neg_span_list = []
+        for neg_doc in neg_docs:
+            neg_span = neg_doc
+            neg_span = " ".join(neg_span)
+            neg_tokens = self.tokenizer.tokenize(neg_span)
+            neg_ids = self.tokenizer.convert_tokens_to_ids(neg_tokens)
+            neg_ids = self.pad_ids(neg_ids)
+            neg_input = self.tokenizer.build_inputs_with_special_tokens(neg_ids)
+
+            neg_span_list.append(torch.tensor(neg_input))
+
+        pos_input = self.tokenizer.build_inputs_with_special_tokens(pos_ids)
+
+        return torch.tensor(pos_input).to(self.device), torch.stack(neg_span_list).to(
+            self.device
+        )
+
+    def get_slice(self, doc):
+        try:
+            end = random.choice(range(4, len(doc)))
+            return doc[:end]
+        except:
+            return doc
+
+    def prepare_train_data(self, data_list, num_negs):
+        train_list = []
+        for each_item in data_list:
+            train_list.append(list(self.prepare_each_item(each_item, num_negs)))
+        return train_list
+
+    def prepare_each_item(self, train_data_item, num_negs):
+        pos_doc = train_data_item["pos"]
+        if self.datatype == "single":
+            neg_docs = [train_data_item["neg"]]
+        elif self.datatype == "multiple":
+            neg_docs = train_data_item["negs"][:num_negs]
+
+        pos_span = pos_doc
+        pos_span = " ".join(pos_span)
+        pos_tokens = self.tokenizer.tokenize(pos_span)
+        pos_ids = self.tokenizer.convert_tokens_to_ids(pos_tokens)
+        pos_ids = self.pad_ids(pos_ids)
+
+        pos_slice = " ".join(self.get_slice(pos_doc))
+        slice_tokens = self.tokenizer.tokenize(pos_slice)
+        slice_ids = self.tokenizer.convert_tokens_to_ids(slice_tokens)
+        slice_ids = self.pad_ids(slice_ids)
+
+        neg_span_list = []
+        for neg_doc in neg_docs:
+            neg_span = neg_doc
+            neg_span = " ".join(neg_span)
+            neg_tokens = self.tokenizer.tokenize(neg_span)
+            neg_ids = self.tokenizer.convert_tokens_to_ids(neg_tokens)
+            neg_ids = self.pad_ids(neg_ids)
+
+            neg_input = self.tokenizer.build_inputs_with_special_tokens(neg_ids)
+
+            neg_span_list.append(torch.tensor(neg_input))
+
+        pos_input = self.tokenizer.build_inputs_with_special_tokens(pos_ids)
+        slice_input = self.tokenizer.build_inputs_with_special_tokens(slice_ids)
+
+        pos_tensor = torch.tensor(pos_input).unsqueeze(0).to(self.device)
+        slice_tensor = torch.tensor(slice_input).unsqueeze(0).to(self.device)
+        neg_tensor_stack = torch.stack(neg_span_list).unsqueeze(0).to(self.device)
+
+        return pos_tensor, slice_tensor, neg_tensor_stack
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        return self.prepare_data(idx)
+
+
+class LoadData:
+    def __init__(
+            self, fname, batch_size, model, device, datatype, negs, max_len, model_type
+    ):
+        self.fname = fname
+        self.batch_size = batch_size
+        self.dataset = MomentumDataset(fname, model, device, datatype, negs, max_len)
+
+    def data_loader(self):
+        data_sampler = SequentialSampler(self.dataset)
+        loader = DataLoader(
+            dataset=self.dataset, sampler=data_sampler, batch_size=self.batch_size
+        )
+        return loader
+
+
+class TrainMomentumModel:
+    def save_model(self, output_dir, step, accuracy):
+        if not os.path.isdir(output_dir):
+            os.mkdir(output_dir)
+        model_path = os.path.join(
+            output_dir,
+            "{}_seed-{}_bs-{}_lr-{}_step-{}_type-{}_acc-{}.mom".format(
+                self.desc,
+                self.seed,
+                self.batch_size,
+                self.learning_rate,
+                step,
+                self.model_size,
+                accuracy,
+            ),
+        )
+        # torch.save(self.xlnet_model.state_dict(), model_path)
+        self.xlnet_model.save_pretrained(model_path)
+
+    def __init__(self, args):
+        self.batch_size = args.batch_size
+        self.model_size = args.model_size
+        self.learning_rate = args.lr_start
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+        self.train_file = args.train_file
+        self.dev_file = args.dev_file
+        if args.test_file:
+            self.test_file = args.test_file
+        else:
+            self.test_file = args.dev_file
+        self.negs = args.num_negs
+        self.rank_negs = args.num_rank_negs
+        self.train_steps = args.train_steps
+        self.margin = args.margin
+        self.desc = args.model_description
+        self.seed = args.seed
+        self.datatype = args.data_type
+        self.max_len = args.max_len
+        self.bestacc = 0.0
+        self.model_type = args.coherence_model_type
+
+        torch.manual_seed(args.seed)
+        torch.cuda.manual_seed_all(args.seed)
+
+        self.xlnet_model = MomentumModel(args)
+        self.xlnet_model.init_encoders()
+
+        self.output_dir = args.output_dir + datetime.datetime.now().strftime(
+            "%Y%m%d%H%M%S"
+        )
+
+        self.xlnet_model = self.xlnet_model.to(self.device)
+
+        self.optimizer = AdamW(self.xlnet_model.parameters(), lr=self.learning_rate)
+        self.scheduler = SWALR(
+            self.optimizer,
+            anneal_strategy="linear",
+            anneal_epochs=args.lr_anneal_epochs,
+            swa_lr=args.lr_end,
+        )
+        self.total_loss = 0.0
+
+        self.eval_interval = args.eval_interval
+
+    def get_ranked_negs(self, neg_scores):
+        ranked_idx = sorted(
+            range(len(neg_scores)), key=neg_scores.__getitem__, reverse=True
+        )
+        hard_negs = ranked_idx[: self.negs]
+        return hard_negs
+
+    def get_next_train_data(self, processed_exploration_data):
+        self.xlnet_model.eval()
+
+        next_train_data = []
+        with torch.no_grad():
+            for i, each_data in enumerate(processed_exploration_data):
+                try:
+                    pos_input, slice_input, neg_input = each_data
+                except Exception as e:
+                    print(e)
+                    continue
+
+                pos_score, neg_scores = self.xlnet_model.eval_forward(
+                    pos_input, neg_input
+                )
+                pos_score = pos_score.to(torch.device("cpu"))
+                neg_scores = neg_scores.to(torch.device("cpu"))
+
+                next_neg_idx = self.get_ranked_negs(neg_scores)
+
+                if len(next_neg_idx) < self.negs:
+                    continue
+
+                neg_data_list = torch.stack(
+                    [neg_input[0][x] for x in next_neg_idx]
+                ).unsqueeze(0)
+                next_train_data.append([pos_input, slice_input, neg_data_list])
+
+        return next_train_data
+
+    def hard_negs_controller(self):
+        start = time.time()
+        train_data = MomentumDataset(
+            self.train_file,
+            self.model_size,
+            self.device,
+            self.datatype,
+            self.negs,
+            self.max_len,
+        )
+        init_train_data = train_data.data[: self.train_steps]
+        total_iterations = len(train_data.data) // self.train_steps
+
+        for iteration_index in range(total_iterations):
+            full_time = time.asctime(time.localtime(time.time()))
+
+            print(
+                "ITERATION: {} TIME: {} LOSS: {}".format(
+                    iteration_index, full_time, self.total_loss
+                )
+            )
+            self.total_loss = 0.0
+
+            if iteration_index == 0:
+                processed_train_data_list = train_data.prepare_train_data(
+                    init_train_data, self.negs
+                )
+                self.train_xlnet_model(processed_train_data_list, iteration_index)
+                next_train_data = []
+            else:
+                start_index = iteration_index * self.train_steps
+                end_index = start_index + self.train_steps
+
+                processed_explore_data_list = train_data.prepare_train_data(
+                    train_data.data[start_index:end_index], self.rank_negs
+                )
+                next_train_data = self.get_next_train_data(processed_explore_data_list)
+                self.train_xlnet_model(next_train_data, iteration_index)
+
+                if (self.train_steps * (iteration_index + 1)) % self.eval_interval == 0:
+                    self.scheduler.step()
+                    self.eval_model(
+                        self.dev_file, self.train_steps * (iteration_index + 1), start
+                    )
+
+        self.eval_model(self.dev_file, self.train_steps * (iteration_index + 1), start)
+
+    def train_xlnet_model(self, train_loader):
+        self.xlnet_model.train()
+
+        for step, data in enumerate(train_loader):
+
+            self.optimizer.zero_grad()
+
+            try:
+                pos_input, slice_input, neg_input = data
+            except Exception as e:
+                print(e)
+                continue
+
+            combined_loss = self.xlnet_model(pos_input, slice_input, neg_input)
+            combined_loss.backward()
+
+            self.xlnet_model.update_momentum_encoder()
+            self.optimizer.step()
+
+            self.total_loss += combined_loss.item()
+
+    def eval_model(self, data_file, step, start):
+
+        print(self.desc, self.seed, "EVAL START")
+        batch_size = self.batch_size
+        self.xlnet_model.eval()
+        test_data = LoadData(
+            data_file,
+            self.batch_size,
+            self.model_size,
+            self.device,
+            self.datatype,
+            self.negs,
+            self.max_len,
+            self.model_type,
+        )
+        test_loader = test_data.data_loader()
+
+        correct = 0.0
+        total = 0.0
+
+        with torch.no_grad():
+            for data in test_loader:
+                try:
+                    pos_input, neg_inputs = data
+                except Exception as e:
+                    print(e)
+                    continue
+
+                pos_score, neg_scores = self.xlnet_model.eval_forward(
+                    pos_input, neg_inputs
+                )
+                try:
+                    max_neg_score = torch.max(neg_scores, -1).values
+                except:
+                    max_neg_score = max(neg_scores)
+
+                if pos_score > max_neg_score:
+                    correct += 1.0
+                total += 1.0
+
+        self.xlnet_model.train()
+        end = time.time()
+        full_time = time.asctime(time.localtime(end))
+        acc = correct / total
+        if data_file == self.dev_file:
+            print(
+                "DEV EVAL Time: {} Elapsed: {} Steps: {} Acc: {}".format(
+                    full_time, end - start, step, acc
+                )
+            )
+            if step > 0:
+                self.bestacc = acc
+                self.save_model(self.output_dir, step, acc)
+        elif data_file == self.test_file:
+            print(
+                "Please evaluate the test file separately with the best saved checkpoint."
+            )
+            print(
+                "TEST EVAL Time: {} Steps: {} Acc: {}".format(
+                    full_time, end - start, step, acc
+                )
+            )
+
+        return
diff --git a/sgnlp/models/coherence_momentum/train_config.py b/sgnlp/models/coherence_momentum/train_config.py
new file mode 100644
index 0000000..821bde3
--- /dev/null
+++ b/sgnlp/models/coherence_momentum/train_config.py
@@ -0,0 +1,18 @@
+from dataclasses import dataclass, field
+
+
+@dataclass
+class CoherenceMomentumTrainConfig:
+    train_file: str = field(metadata={"help": "Train file path."})
+    dev_file: str = field(metadata={"help": "Dev file path."})
+    test_file: str = field(metadata={"help": "Test file path."})
+    eval_file: str = field(metadata={"help": "Eval file path."})
+    DATA_TYPE_CHOICES = ["multiple", "single"]
+    data_type: str = field(
+        metadata={"choices": DATA_TYPE_CHOICES, "help": "Data format."}
+    )
+
+    def __post_init__(self):
+        assert (
+            self.data_type in self.DATA_TYPE_CHOICES
+        ), f"Data type must be one of {self.DATA_TYPE_CHOICES}"

From 0606451c11ab51d7d3b830b0f72a90839053aa4d Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Wed, 3 Aug 2022 16:55:17 +0800
Subject: [PATCH 02/20] Correct minor grammar errors

---
 sgnlp/models/lsr/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sgnlp/models/lsr/train.py b/sgnlp/models/lsr/train.py
index eb32349..3be2137 100644
--- a/sgnlp/models/lsr/train.py
+++ b/sgnlp/models/lsr/train.py
@@ -60,8 +60,8 @@ def compute(self, input_theta=None):
 
         Args:
             input_theta (`optional`, `float`):
-                Prediction threshold. Provide a value between 0 to 1 if you want to compute the precision and recall
-                for that specific threshold. Otherwise the optimal based on f1 score will be computed for you.
+                Prediction threshold. Provide a value between 0 and 1 if you want to compute the precision and recall
+                for that specific threshold. Otherwise, the optimal based on f1 score will be computed for you.
         """
         # Sorts in descending order by predicted value
         self.test_result.sort(key=lambda x: x[1], reverse=True)

From e7d38a0c6e8ff53d593de47442e00965dda27fe8 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 8 Aug 2022 11:36:43 +0800
Subject: [PATCH 03/20] [#57] Refactor model and train config

---
 sgnlp/models/coherence_momentum/config.py     |  14 ---
 sgnlp/models/coherence_momentum/modeling.py   |  12 +-
 sgnlp/models/coherence_momentum/train.py      | 116 +++++++++---------
 .../models/coherence_momentum/train_config.py |   8 ++
 sgnlp/utils/train_config.py                   |   7 ++
 5 files changed, 79 insertions(+), 78 deletions(-)
 create mode 100644 sgnlp/utils/train_config.py

diff --git a/sgnlp/models/coherence_momentum/config.py b/sgnlp/models/coherence_momentum/config.py
index 9c1b8d3..ca4251a 100755
--- a/sgnlp/models/coherence_momentum/config.py
+++ b/sgnlp/models/coherence_momentum/config.py
@@ -7,17 +7,10 @@ class CoherenceConfig(PretrainedConfig):
     def __init__(
         self,
         model_size: str = "base",
-        lr_start: float = 5e-06,
-        lr_end: float = 1e-06,
-        lr_anneal_epochs: int = 50,
-        eval_interval: int = 1000,
-        seed: int = 100,
-        batch_size: int = 1,
         margin: float = 0.1,
         num_negs: int = 5,
         max_len: int = 600,
         num_rank_negs: int = 50,
-        train_steps: int = 200,
         momentum_coefficient: float = 0.9999999,
         queue_size: int = 1000,
         contrastive_loss_weight: float = 0.85,
@@ -26,17 +19,10 @@ def __init__(
         super().__init__(**kwargs)
 
         self.model_size = model_size
-        self.lr_start = lr_start
-        self.lr_end = lr_end
-        self.lr_anneal_epochs = lr_anneal_epochs
-        self.eval_interval = eval_interval
-        self.seed = seed
-        self.batch_size = batch_size
         self.margin = margin
         self.num_negs = num_negs
         self.max_len = max_len
         self.num_rank_negs = num_rank_negs
-        self.train_steps = train_steps
         self.momentum_coefficient = momentum_coefficient
         self.queue_size = queue_size
         self.contrastive_loss_weight = contrastive_loss_weight
diff --git a/sgnlp/models/coherence_momentum/modeling.py b/sgnlp/models/coherence_momentum/modeling.py
index 54273dd..1e74734 100644
--- a/sgnlp/models/coherence_momentum/modeling.py
+++ b/sgnlp/models/coherence_momentum/modeling.py
@@ -31,12 +31,10 @@ def __init__(self, config):
         self.queue = []
         self.queue_size = config.queue_size
         self.con_loss_weight = config.contrastive_loss_weight
-        self.negs = config.num_negs
+        self.num_negs = config.num_negs
         self.margin = config.margin
         self.cosim = nn.CosineSimilarity()
-        self.crossEntropy = nn.CrossEntropyLoss()
-        self.getTranspose = lambda x: torch.transpose(x, -2, -1)
-        self.subMargin = lambda z: z - config.margin
+        self.sub_margin = lambda z: z - config.margin
 
         self.conlinear = nn.Linear(hidden_size, 1)
 
@@ -77,7 +75,7 @@ def forward(self, pos_doc, pos_slice, neg_docs):
         neg_moco_rep = list(map(self.get_momentum_rep, list(neg_docs)))
 
         if len(self.queue) >= self.queue_size:  # global negative queue size
-            del self.queue[: self.negs]
+            del self.queue[: self.num_negs]
         self.queue.extend(neg_moco_rep[0])
 
         pos_sim, neg_sims = self.get_cos_sim(pos_rep, pos_slice_rep)
@@ -97,14 +95,14 @@ def eval_forward(self, pos_doc, neg_docs):
         return pos_score.detach(), neg_scores[0].detach()
 
     def sim_contrastive_loss(self, pos_sim, neg_sims):
-        neg_sims_sub = torch.stack(list(map(self.subMargin, neg_sims))).view(-1)
+        neg_sims_sub = torch.stack(list(map(self.sub_margin, neg_sims))).view(-1)
         all_sims = torch.cat((neg_sims_sub, pos_sim), dim=-1)
         lsmax = -1 * F.log_softmax(all_sims, dim=-1)
         loss = lsmax[-1]
         return loss
 
     def contrastive_loss(self, pos_score, neg_scores):
-        neg_scores_sub = torch.stack(list(map(self.subMargin, neg_scores)))
+        neg_scores_sub = torch.stack(list(map(self.sub_margin, neg_scores)))
         all_scores = torch.cat((neg_scores_sub, pos_score), dim=-1)
         lsmax = -1 * F.log_softmax(all_scores, dim=-1)
         pos_loss = lsmax[-1]
diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
index d5d287a..8380247 100755
--- a/sgnlp/models/coherence_momentum/train.py
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -1,3 +1,4 @@
+import argparse
 import pickle
 import torch
 import time
@@ -9,6 +10,9 @@
 from torch.optim.swa_utils import SWALR
 from transformers import XLNetTokenizer
 from modeling import MomentumModel
+from sgnlp.models.coherence_momentum.config import CoherenceConfig
+from sgnlp.models.coherence_momentum.train_config import CoherenceMomentumTrainConfig
+from sgnlp.utils.train_config import load_train_config
 
 
 class MomentumDataset(Dataset):
@@ -130,9 +134,7 @@ def __getitem__(self, idx):
 
 
 class LoadData:
-    def __init__(
-            self, fname, batch_size, model, device, datatype, negs, max_len, model_type
-    ):
+    def __init__(self, fname, batch_size, model, device, datatype, negs, max_len):
         self.fname = fname
         self.batch_size = batch_size
         self.dataset = MomentumDataset(fname, model, device, datatype, negs, max_len)
@@ -151,70 +153,62 @@ def save_model(self, output_dir, step, accuracy):
             os.mkdir(output_dir)
         model_path = os.path.join(
             output_dir,
-            "{}_seed-{}_bs-{}_lr-{}_step-{}_type-{}_acc-{}.mom".format(
-                self.desc,
-                self.seed,
-                self.batch_size,
-                self.learning_rate,
-                step,
-                self.model_size,
-                accuracy,
-            ),
+            f"momentum_seed-{self.seed}_bs-{self.batch_size}_lr-{self.train_config.lr_start}_step-{step}_type-{self.model_size}_acc-{accuracy}",
         )
-        # torch.save(self.xlnet_model.state_dict(), model_path)
         self.xlnet_model.save_pretrained(model_path)
 
-    def __init__(self, args):
-        self.batch_size = args.batch_size
-        self.model_size = args.model_size
-        self.learning_rate = args.lr_start
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    def __init__(self, model_config_path, train_config_path):
+        self.model_config = CoherenceConfig.from_pretrained(model_config_path)
+        self.train_config = load_train_config(
+            CoherenceMomentumTrainConfig, train_config_path
+        )
+
+        self.model_size = self.model_config.model_size
+        self.num_negs = self.model_config.num_negs
+        self.max_len = self.model_config.max_len
+        self.rank_negs = self.model_config.num_rank_negs
 
-        self.train_file = args.train_file
-        self.dev_file = args.dev_file
-        if args.test_file:
-            self.test_file = args.test_file
+        self.dev_file = self.train_config.dev_file
+        if self.train_config.test_file:
+            self.test_file = self.train_config.test_file
         else:
-            self.test_file = args.dev_file
-        self.negs = args.num_negs
-        self.rank_negs = args.num_rank_negs
-        self.train_steps = args.train_steps
-        self.margin = args.margin
-        self.desc = args.model_description
-        self.seed = args.seed
-        self.datatype = args.data_type
-        self.max_len = args.max_len
-        self.bestacc = 0.0
-        self.model_type = args.coherence_model_type
+            self.test_file = self.train_config.dev_file
+        self.output_dir = (
+            self.train_config.output_dir
+            + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+        )
+        self.datatype = self.train_config.data_type
+        self.eval_interval = self.train_config.eval_interval
+        self.seed = self.train_config.seed
+        self.batch_size = self.train_config.batch_size
+        self.train_steps = self.train_config.train_steps
 
-        torch.manual_seed(args.seed)
-        torch.cuda.manual_seed_all(args.seed)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        torch.manual_seed(self.seed)
+        torch.cuda.manual_seed_all(self.seed)
 
-        self.xlnet_model = MomentumModel(args)
+        self.xlnet_model = MomentumModel(self.model_config)
         self.xlnet_model.init_encoders()
-
-        self.output_dir = args.output_dir + datetime.datetime.now().strftime(
-            "%Y%m%d%H%M%S"
-        )
-
         self.xlnet_model = self.xlnet_model.to(self.device)
 
-        self.optimizer = AdamW(self.xlnet_model.parameters(), lr=self.learning_rate)
+        self.optimizer = AdamW(
+            self.xlnet_model.parameters(), lr=self.train_config.lr_start
+        )
         self.scheduler = SWALR(
             self.optimizer,
             anneal_strategy="linear",
-            anneal_epochs=args.lr_anneal_epochs,
-            swa_lr=args.lr_end,
+            anneal_epochs=self.train_config.lr_anneal_epochs,
+            swa_lr=self.train_config.lr_end,
         )
-        self.total_loss = 0.0
 
-        self.eval_interval = args.eval_interval
+        self.total_loss = 0.0
+        self.bestacc = 0.0
 
     def get_ranked_negs(self, neg_scores):
         ranked_idx = sorted(
             range(len(neg_scores)), key=neg_scores.__getitem__, reverse=True
         )
-        hard_negs = ranked_idx[: self.negs]
+        hard_negs = ranked_idx[: self.num_negs]
         return hard_negs
 
     def get_next_train_data(self, processed_exploration_data):
@@ -237,7 +231,7 @@ def get_next_train_data(self, processed_exploration_data):
 
                 next_neg_idx = self.get_ranked_negs(neg_scores)
 
-                if len(next_neg_idx) < self.negs:
+                if len(next_neg_idx) < self.num_negs:
                     continue
 
                 neg_data_list = torch.stack(
@@ -250,11 +244,11 @@ def get_next_train_data(self, processed_exploration_data):
     def hard_negs_controller(self):
         start = time.time()
         train_data = MomentumDataset(
-            self.train_file,
+            self.train_config.train_file,
             self.model_size,
             self.device,
             self.datatype,
-            self.negs,
+            self.num_negs,
             self.max_len,
         )
         init_train_data = train_data.data[: self.train_steps]
@@ -272,10 +266,9 @@ def hard_negs_controller(self):
 
             if iteration_index == 0:
                 processed_train_data_list = train_data.prepare_train_data(
-                    init_train_data, self.negs
+                    init_train_data, self.num_negs
                 )
                 self.train_xlnet_model(processed_train_data_list, iteration_index)
-                next_train_data = []
             else:
                 start_index = iteration_index * self.train_steps
                 end_index = start_index + self.train_steps
@@ -316,9 +309,6 @@ def train_xlnet_model(self, train_loader):
             self.total_loss += combined_loss.item()
 
     def eval_model(self, data_file, step, start):
-
-        print(self.desc, self.seed, "EVAL START")
-        batch_size = self.batch_size
         self.xlnet_model.eval()
         test_data = LoadData(
             data_file,
@@ -326,9 +316,8 @@ def eval_model(self, data_file, step, start):
             self.model_size,
             self.device,
             self.datatype,
-            self.negs,
+            self.num_negs,
             self.max_len,
-            self.model_type,
         )
         test_loader = test_data.data_loader()
 
@@ -379,3 +368,16 @@ def eval_model(self, data_file, step, start):
             )
 
         return
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--train_config_file", type=str)
+    parser.add_argument("--model_config_file", type=str)
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    trainer = TrainMomentumModel(args.model_config_file, args.train_config_file)
diff --git a/sgnlp/models/coherence_momentum/train_config.py b/sgnlp/models/coherence_momentum/train_config.py
index 821bde3..f60fea0 100644
--- a/sgnlp/models/coherence_momentum/train_config.py
+++ b/sgnlp/models/coherence_momentum/train_config.py
@@ -7,10 +7,18 @@ class CoherenceMomentumTrainConfig:
     dev_file: str = field(metadata={"help": "Dev file path."})
     test_file: str = field(metadata={"help": "Test file path."})
     eval_file: str = field(metadata={"help": "Eval file path."})
+    output_dir: str = field(metadata={"help": "Output directory."})
     DATA_TYPE_CHOICES = ["multiple", "single"]
     data_type: str = field(
         metadata={"choices": DATA_TYPE_CHOICES, "help": "Data format."}
     )
+    lr_start: float = field(default=5e-06)
+    lr_end: float = field(default=1e-06)
+    lr_anneal_epochs: int = field(default=50)
+    eval_interval: int = field(default=1000)
+    seed: int = field(default=100)
+    batch_size: int = field(default=1)
+    train_steps: int = field(default=200)
 
     def __post_init__(self):
         assert (
diff --git a/sgnlp/utils/train_config.py b/sgnlp/utils/train_config.py
new file mode 100644
index 0000000..e53f311
--- /dev/null
+++ b/sgnlp/utils/train_config.py
@@ -0,0 +1,7 @@
+import json
+
+
+def load_train_config(config_class, json_file_path):
+    with open(json_file_path, "r") as f:
+        json_file = json.load(f)
+    return config_class(**json_file)

From 304e3999d9e38f5cef55ac2a3aef27a8c461dd8d Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 8 Aug 2022 16:47:25 +0800
Subject: [PATCH 04/20] [#57] Add default config files

---
 sgnlp/models/coherence_momentum/model_config.json | 10 ++++++++++
 sgnlp/models/coherence_momentum/train_config.json | 15 +++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 sgnlp/models/coherence_momentum/model_config.json
 create mode 100644 sgnlp/models/coherence_momentum/train_config.json

diff --git a/sgnlp/models/coherence_momentum/model_config.json b/sgnlp/models/coherence_momentum/model_config.json
new file mode 100644
index 0000000..0b44fbe
--- /dev/null
+++ b/sgnlp/models/coherence_momentum/model_config.json
@@ -0,0 +1,10 @@
+{
+  "contrastive_loss_weight": 0.85,
+  "margin": 0.1,
+  "max_len": 600,
+  "model_size": "base",
+  "momentum_coefficient": 0.9999999,
+  "num_negs": 5,
+  "num_rank_negs": 50,
+  "queue_size": 1000
+}
\ No newline at end of file
diff --git a/sgnlp/models/coherence_momentum/train_config.json b/sgnlp/models/coherence_momentum/train_config.json
new file mode 100644
index 0000000..4a4f539
--- /dev/null
+++ b/sgnlp/models/coherence_momentum/train_config.json
@@ -0,0 +1,15 @@
+{
+  "batch_size": 1,
+  "data_type": "multiple",
+  "dev_file": "permuted_wsj_dev_max-negs-100_size-4K",
+  "eval_file": "",
+  "eval_interval": 1000,
+  "lr_anneal_epochs": 50,
+  "lr_end": 1e-06,
+  "lr_start": 5e-06,
+  "output_dir": "outputs",
+  "seed": 100,
+  "test_file": "",
+  "train_file": "permuted_wsj_train_max-negs-100_size-46K",
+  "train_steps": 200
+}
\ No newline at end of file

From 50934c98bf272650fc2bb7ad2b9a147f0afd1138 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 16:47:24 +0800
Subject: [PATCH 05/20] [#57] Minor change to import

---
 sgnlp/models/coherence_momentum/train.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
index 8380247..5274322 100755
--- a/sgnlp/models/coherence_momentum/train.py
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -1,17 +1,18 @@
 import argparse
 import pickle
-import torch
 import time
 import os
 import datetime
 import random
+
+import torch
 from torch.utils.data import Dataset, DataLoader, SequentialSampler
-from transformers import AdamW
+from transformers import AdamW, XLNetTokenizer
 from torch.optim.swa_utils import SWALR
-from transformers import XLNetTokenizer
-from modeling import MomentumModel
-from sgnlp.models.coherence_momentum.config import CoherenceConfig
-from sgnlp.models.coherence_momentum.train_config import CoherenceMomentumTrainConfig
+
+from .modeling import MomentumModel
+from .config import CoherenceConfig
+from .train_config import CoherenceMomentumTrainConfig
 from sgnlp.utils.train_config import load_train_config
 
 

From 9920647dab55ec469b5baff00774d5060579c29b Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 16:48:35 +0800
Subject: [PATCH 06/20] [#57] Add files for running experiment on polyaxon

---
 .../model-training.Dockerfile                 | 15 +++++
 .../polyaxon-experiment-nomig.yml             | 62 +++++++++++++++++++
 polyaxon/coherence_momentum/requirements.txt  | 21 +++++++
 3 files changed, 98 insertions(+)
 create mode 100644 polyaxon/coherence_momentum/model-training.Dockerfile
 create mode 100644 polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
 create mode 100644 polyaxon/coherence_momentum/requirements.txt

diff --git a/polyaxon/coherence_momentum/model-training.Dockerfile b/polyaxon/coherence_momentum/model-training.Dockerfile
new file mode 100644
index 0000000..44f96d4
--- /dev/null
+++ b/polyaxon/coherence_momentum/model-training.Dockerfile
@@ -0,0 +1,15 @@
+FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-devel
+
+ARG REPO_DIR="."
+ARG PROJECT_USER="aisg"
+ARG HOME_DIR="/home/$PROJECT_USER"
+
+COPY $REPO_DIR nlp-hub-gcp
+WORKDIR $REPO_DIR/nlp-hub-gcp
+
+RUN pip install -r polyaxon/coherence_momentum/requirements.txt
+RUN groupadd -g 2222 $PROJECT_USER && useradd -u 2222 -g 2222 -m $PROJECT_USER
+RUN chown -R 2222:2222 $HOME_DIR && \
+    rm /bin/sh && ln -s /bin/bash /bin/sh
+USER 2222
+
diff --git a/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
new file mode 100644
index 0000000..9c8a4ba
--- /dev/null
+++ b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
@@ -0,0 +1,62 @@
+version: 1.1
+kind: component
+name: train-model
+description: Job for training a predictive model using GPU.
+tags: [model_training]
+inputs:
+  - name: SA_CRED_PATH
+    description: Path to credential file for GCP service account.
+    isOptional: true
+    type: str
+    value: /var/secret/cloud.google.com/gcp-service-account.json
+    toEnv: GOOGLE_APPLICATION_CREDENTIALS
+  - name: WORKING_DIR
+    description: The working directory for the job to run in.
+    isOptional: true
+    value: /home/aisg/nlp-hub-gcp
+    type: str
+  - name: TRAIN_CONFIG_FILE_PATH
+    description: Config file path.
+    type: str
+    isOptional: false
+  - name: MODEL_CONFIG_FILE_PATH
+    description: Config file path.
+    type: str
+    isOptional: false
+run:
+  kind: job
+  connections: [fstore-pvc]
+  environment:
+    imagePullSecrets: ["gcp-imagepullsecrets"]
+    tolerations:
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Equal
+        value: present
+      - effect: NoSchedule
+        key: nomig
+        operator: Equal
+        value: present
+  volumes:
+    - name: gcp-service-account
+      secret:
+        secretName: "gcp-sa-credentials"
+  container:
+    image: asia.gcr.io/nlp-hub/coherence-paradigm-refactored:0.3
+    imagePullPolicy: IfNotPresent
+    workingDir: "{{ WORKING_DIR }}"
+    command: ["/bin/bash","-c"]
+    args: [
+      "python -m sgnlp.models.coherence_momentum.train \
+      --train_config_file {{ TRAIN_CONFIG_FILE_PATH }} \
+      --model_config_file {{ MODEL_CONFIG_FILE_PATH }}
+      "
+    ]
+    resources:
+      requests:
+        nvidia.com/gpu: 1
+      limits:
+        nvidia.com/gpu: 1
+    volumeMounts:
+      - name: gcp-service-account
+        mountPath: /var/secret/cloud.google.com
diff --git a/polyaxon/coherence_momentum/requirements.txt b/polyaxon/coherence_momentum/requirements.txt
new file mode 100644
index 0000000..13ca7f6
--- /dev/null
+++ b/polyaxon/coherence_momentum/requirements.txt
@@ -0,0 +1,21 @@
+pandas==1.1.5
+mlflow==1.22.0
+protobuf==3.20.*
+pylint==2.6.0
+pytest-cov==2.10.1
+pyyaml==5.4.1
+python-json-logger==2.0.2
+polyaxon==1.11.3
+google-cloud-storage==1.43.0
+hydra-core==1.1.1
+hydra-optuna-sweeper==1.1.1
+optuna==2.10.0
+fastapi==0.70.1
+uvicorn[standard]==0.14.0
+gunicorn==20.1.0
+nltk
+scikit-learn
+torchtext
+transformers
+sentencepiece
+-e .
\ No newline at end of file

From a5108d27a003618f7f90b7893fe163918fbce9af Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 17:15:34 +0800
Subject: [PATCH 07/20] [#57] Remove unused attribute

---
 sgnlp/models/coherence_momentum/modeling.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sgnlp/models/coherence_momentum/modeling.py b/sgnlp/models/coherence_momentum/modeling.py
index 1e74734..8b3802e 100644
--- a/sgnlp/models/coherence_momentum/modeling.py
+++ b/sgnlp/models/coherence_momentum/modeling.py
@@ -15,7 +15,6 @@ class MomentumModel(PreTrainedModel):
     def __init__(self, config):
 
         super().__init__(config)
-        self.batch_size = config.batch_size
         self.momentum_coefficient = config.momentum_coefficient
 
         self.encoder_name = f"xlnet-{config.model_size}-cased"

From 4a23a1cecc29846e6fd21bc8db6ca1a4dcf3d693 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 17:16:01 +0800
Subject: [PATCH 08/20] [#57] Update build tag

---
 polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
index 9c8a4ba..9cf5302 100644
--- a/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
+++ b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
@@ -42,7 +42,7 @@ run:
       secret:
         secretName: "gcp-sa-credentials"
   container:
-    image: asia.gcr.io/nlp-hub/coherence-paradigm-refactored:0.3
+    image: asia.gcr.io/nlp-hub/coherence-paradigm-refactored:0.4
     imagePullPolicy: IfNotPresent
     workingDir: "{{ WORKING_DIR }}"
     command: ["/bin/bash","-c"]

From 6cccc084ff1078ca012b285ca2b77c52d07d6eeb Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 17:29:20 +0800
Subject: [PATCH 09/20] [#57] Add line of code to start training

---
 sgnlp/models/coherence_momentum/train.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
index 5274322..debc15e 100755
--- a/sgnlp/models/coherence_momentum/train.py
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -382,3 +382,4 @@ def parse_args():
 if __name__ == "__main__":
     args = parse_args()
     trainer = TrainMomentumModel(args.model_config_file, args.train_config_file)
+    trainer.hard_negs_controller()

From 1aa7b2b2c0e05b1b039952317bb965db662e1412 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 17:29:36 +0800
Subject: [PATCH 10/20] [#57] Update build tag

---
 polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
index 9cf5302..0008b19 100644
--- a/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
+++ b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
@@ -42,7 +42,7 @@ run:
       secret:
         secretName: "gcp-sa-credentials"
   container:
-    image: asia.gcr.io/nlp-hub/coherence-paradigm-refactored:0.4
+    image: asia.gcr.io/nlp-hub/coherence-paradigm-refactored:0.5
     imagePullPolicy: IfNotPresent
     workingDir: "{{ WORKING_DIR }}"
     command: ["/bin/bash","-c"]

From becfc11cdd3a0808ad6186cb6fb9c167c94a2241 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 17:39:21 +0800
Subject: [PATCH 11/20] [#57] Remove unused method parameter

---
 sgnlp/models/coherence_momentum/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
index debc15e..5b3f57d 100755
--- a/sgnlp/models/coherence_momentum/train.py
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -269,7 +269,7 @@ def hard_negs_controller(self):
                 processed_train_data_list = train_data.prepare_train_data(
                     init_train_data, self.num_negs
                 )
-                self.train_xlnet_model(processed_train_data_list, iteration_index)
+                self.train_xlnet_model(processed_train_data_list)
             else:
                 start_index = iteration_index * self.train_steps
                 end_index = start_index + self.train_steps
@@ -278,7 +278,7 @@ def hard_negs_controller(self):
                     train_data.data[start_index:end_index], self.rank_negs
                 )
                 next_train_data = self.get_next_train_data(processed_explore_data_list)
-                self.train_xlnet_model(next_train_data, iteration_index)
+                self.train_xlnet_model(next_train_data)
 
                 if (self.train_steps * (iteration_index + 1)) % self.eval_interval == 0:
                     self.scheduler.step()

From f6a3ead90cc41037bb344c400e3a654810f9e28f Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 15 Aug 2022 17:39:38 +0800
Subject: [PATCH 12/20] [#57] Update build tag

---
 polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
index 0008b19..84a62a1 100644
--- a/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
+++ b/polyaxon/coherence_momentum/polyaxon-experiment-nomig.yml
@@ -42,7 +42,7 @@ run:
       secret:
         secretName: "gcp-sa-credentials"
   container:
-    image: asia.gcr.io/nlp-hub/coherence-paradigm-refactored:0.5
+    image: asia.gcr.io/nlp-hub/coherence-paradigm-refactored:0.6
     imagePullPolicy: IfNotPresent
     workingDir: "{{ WORKING_DIR }}"
     command: ["/bin/bash","-c"]

From baddc59ce7496c2de9addebb60b92f04891e5120 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Tue, 16 Aug 2022 12:10:41 +0800
Subject: [PATCH 13/20] [#57] Add functionality to save best n checkpoints

---
 sgnlp/models/coherence_momentum/train.py      | 37 +++++++++++++------
 .../models/coherence_momentum/train_config.py |  1 +
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
index 5b3f57d..39e570a 100755
--- a/sgnlp/models/coherence_momentum/train.py
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -4,6 +4,7 @@
 import os
 import datetime
 import random
+import shutil
 
 import torch
 from torch.utils.data import Dataset, DataLoader, SequentialSampler
@@ -149,15 +150,6 @@ def data_loader(self):
 
 
 class TrainMomentumModel:
-    def save_model(self, output_dir, step, accuracy):
-        if not os.path.isdir(output_dir):
-            os.mkdir(output_dir)
-        model_path = os.path.join(
-            output_dir,
-            f"momentum_seed-{self.seed}_bs-{self.batch_size}_lr-{self.train_config.lr_start}_step-{step}_type-{self.model_size}_acc-{accuracy}",
-        )
-        self.xlnet_model.save_pretrained(model_path)
-
     def __init__(self, model_config_path, train_config_path):
         self.model_config = CoherenceConfig.from_pretrained(model_config_path)
         self.train_config = load_train_config(
@@ -175,14 +167,17 @@ def __init__(self, model_config_path, train_config_path):
         else:
             self.test_file = self.train_config.dev_file
         self.output_dir = (
-            self.train_config.output_dir
-            + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+                self.train_config.output_dir
+                + "-"
+                + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
         )
         self.datatype = self.train_config.data_type
         self.eval_interval = self.train_config.eval_interval
         self.seed = self.train_config.seed
         self.batch_size = self.train_config.batch_size
         self.train_steps = self.train_config.train_steps
+        self.num_checkpoints = self.train_config.num_checkpoints
+        self.best_checkpoints = []  # List of tuples of (accuracy, path)
 
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         torch.manual_seed(self.seed)
@@ -370,6 +365,26 @@ def eval_model(self, data_file, step, start):
 
         return
 
+    def save_model(self, output_dir, step, accuracy):
+        if not os.path.isdir(output_dir):
+            os.mkdir(output_dir)
+        model_path = os.path.join(
+            output_dir,
+            f"momentum_seed-{self.seed}_bs-{self.batch_size}_lr-{self.train_config.lr_start}"
+            f"_step-{step}_type-{self.model_size}_acc-{accuracy:.3f}",
+        )
+
+        if len(self.best_checkpoints) == 0:
+            self.xlnet_model.save_pretrained(model_path)
+            self.best_checkpoints.append((accuracy, model_path))
+        elif accuracy > self.best_checkpoints[-1][0]:
+            self.xlnet_model.save_pretrained(model_path)
+            self.best_checkpoints.append((accuracy, model_path))
+            self.best_checkpoints.sort(key=lambda x: x[0], reverse=True)
+            if len(self.best_checkpoints) > self.num_checkpoints:
+                _, dir_to_delete = self.best_checkpoints.pop()
+                shutil.rmtree(dir_to_delete, ignore_errors=True)
+
 
 def parse_args():
     parser = argparse.ArgumentParser()
diff --git a/sgnlp/models/coherence_momentum/train_config.py b/sgnlp/models/coherence_momentum/train_config.py
index f60fea0..ee79f75 100644
--- a/sgnlp/models/coherence_momentum/train_config.py
+++ b/sgnlp/models/coherence_momentum/train_config.py
@@ -19,6 +19,7 @@ class CoherenceMomentumTrainConfig:
     seed: int = field(default=100)
     batch_size: int = field(default=1)
     train_steps: int = field(default=200)
+    num_checkpoints: int = field(default=5, metadata={"help": "Number of best checkpoints to save"})
 
     def __post_init__(self):
         assert (

From 0c5fa4b16b64da154df154d40f31d6f10a54fa37 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Wed, 31 Aug 2022 22:23:50 +0800
Subject: [PATCH 14/20] [#57] Rename model and config to be consistent

---
 sgnlp/models/coherence_momentum/__init__.py   |  2 ++
 sgnlp/models/coherence_momentum/config.py     |  4 +---
 sgnlp/models/coherence_momentum/modeling.py   | 10 ++++-----
 sgnlp/models/coherence_momentum/train.py      | 22 ++++++++++---------
 .../models/coherence_momentum/train_config.py |  4 +++-
 5 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/sgnlp/models/coherence_momentum/__init__.py b/sgnlp/models/coherence_momentum/__init__.py
index e69de29..cc96117 100644
--- a/sgnlp/models/coherence_momentum/__init__.py
+++ b/sgnlp/models/coherence_momentum/__init__.py
@@ -0,0 +1,2 @@
+from .modeling import CoherenceMomentumModel
+from .config import CoherenceMomentumConfig
diff --git a/sgnlp/models/coherence_momentum/config.py b/sgnlp/models/coherence_momentum/config.py
index ca4251a..06f63d4 100755
--- a/sgnlp/models/coherence_momentum/config.py
+++ b/sgnlp/models/coherence_momentum/config.py
@@ -1,9 +1,7 @@
 from transformers import PretrainedConfig
 
 
-class CoherenceConfig(PretrainedConfig):
-    model_type = "general_coherence_model"
-
+class CoherenceMomentumConfig(PretrainedConfig):
     def __init__(
         self,
         model_size: str = "base",
diff --git a/sgnlp/models/coherence_momentum/modeling.py b/sgnlp/models/coherence_momentum/modeling.py
index 8b3802e..575d146 100644
--- a/sgnlp/models/coherence_momentum/modeling.py
+++ b/sgnlp/models/coherence_momentum/modeling.py
@@ -3,15 +3,15 @@
 import torch.nn.functional as F
 from transformers import XLNetModel, XLNetConfig
 from transformers import PreTrainedModel
-from .config import CoherenceConfig
+from .config import CoherenceMomentumConfig
 
 
-class MomentumPreTrainedModel(PreTrainedModel):
-    config_class = CoherenceConfig
-    base_model_prefix = "momentum"
+class CoherenceMomentumPreTrainedModel(PreTrainedModel):
+    config_class = CoherenceMomentumConfig
+    base_model_prefix = "coherence_momentum"
 
 
-class MomentumModel(PreTrainedModel):
+class CoherenceMomentumModel(CoherenceMomentumPreTrainedModel):
     def __init__(self, config):
 
         super().__init__(config)
diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
index 39e570a..3b2d02f 100755
--- a/sgnlp/models/coherence_momentum/train.py
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -11,13 +11,13 @@
 from transformers import AdamW, XLNetTokenizer
 from torch.optim.swa_utils import SWALR
 
-from .modeling import MomentumModel
-from .config import CoherenceConfig
+from .modeling import CoherenceMomentumModel
+from .config import CoherenceMomentumConfig
 from .train_config import CoherenceMomentumTrainConfig
 from sgnlp.utils.train_config import load_train_config
 
 
-class MomentumDataset(Dataset):
+class CoherenceMomentumDataset(Dataset):
     def __init__(self, fname, model, device, datatype, negs, max_len):
         self.fname = fname
         self.device = device
@@ -139,7 +139,9 @@ class LoadData:
     def __init__(self, fname, batch_size, model, device, datatype, negs, max_len):
         self.fname = fname
         self.batch_size = batch_size
-        self.dataset = MomentumDataset(fname, model, device, datatype, negs, max_len)
+        self.dataset = CoherenceMomentumDataset(
+            fname, model, device, datatype, negs, max_len
+        )
 
     def data_loader(self):
         data_sampler = SequentialSampler(self.dataset)
@@ -151,7 +153,7 @@ def data_loader(self):
 
 class TrainMomentumModel:
     def __init__(self, model_config_path, train_config_path):
-        self.model_config = CoherenceConfig.from_pretrained(model_config_path)
+        self.model_config = CoherenceMomentumConfig.from_pretrained(model_config_path)
         self.train_config = load_train_config(
             CoherenceMomentumTrainConfig, train_config_path
         )
@@ -167,9 +169,9 @@ def __init__(self, model_config_path, train_config_path):
         else:
             self.test_file = self.train_config.dev_file
         self.output_dir = (
-                self.train_config.output_dir
-                + "-"
-                + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+            self.train_config.output_dir
+            + "-"
+            + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
         )
         self.datatype = self.train_config.data_type
         self.eval_interval = self.train_config.eval_interval
@@ -183,7 +185,7 @@ def __init__(self, model_config_path, train_config_path):
         torch.manual_seed(self.seed)
         torch.cuda.manual_seed_all(self.seed)
 
-        self.xlnet_model = MomentumModel(self.model_config)
+        self.xlnet_model = CoherenceMomentumModel(self.model_config)
         self.xlnet_model.init_encoders()
         self.xlnet_model = self.xlnet_model.to(self.device)
 
@@ -239,7 +241,7 @@ def get_next_train_data(self, processed_exploration_data):
 
     def hard_negs_controller(self):
         start = time.time()
-        train_data = MomentumDataset(
+        train_data = CoherenceMomentumDataset(
             self.train_config.train_file,
             self.model_size,
             self.device,
diff --git a/sgnlp/models/coherence_momentum/train_config.py b/sgnlp/models/coherence_momentum/train_config.py
index ee79f75..c727962 100644
--- a/sgnlp/models/coherence_momentum/train_config.py
+++ b/sgnlp/models/coherence_momentum/train_config.py
@@ -19,7 +19,9 @@ class CoherenceMomentumTrainConfig:
     seed: int = field(default=100)
     batch_size: int = field(default=1)
     train_steps: int = field(default=200)
-    num_checkpoints: int = field(default=5, metadata={"help": "Number of best checkpoints to save"})
+    num_checkpoints: int = field(
+        default=5, metadata={"help": "Number of best checkpoints to save"}
+    )
 
     def __post_init__(self):
         assert (

From a54c7ed6d4c4ac8fa04d21cbd56079f32a256c56 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Wed, 31 Aug 2022 23:26:10 +0800
Subject: [PATCH 15/20] [#57] Add preprocessor

---
 sgnlp/models/coherence_momentum/__init__.py   |  1 +
 sgnlp/models/coherence_momentum/preprocess.py | 48 +++++++++++++++++++
 sgnlp/models/coherence_momentum/train.py      | 14 +++---
 3 files changed, 56 insertions(+), 7 deletions(-)
 create mode 100644 sgnlp/models/coherence_momentum/preprocess.py

diff --git a/sgnlp/models/coherence_momentum/__init__.py b/sgnlp/models/coherence_momentum/__init__.py
index cc96117..9599aa0 100644
--- a/sgnlp/models/coherence_momentum/__init__.py
+++ b/sgnlp/models/coherence_momentum/__init__.py
@@ -1,2 +1,3 @@
 from .modeling import CoherenceMomentumModel
 from .config import CoherenceMomentumConfig
+from .preprocess import CoherenceMomentumPreprocessor
diff --git a/sgnlp/models/coherence_momentum/preprocess.py b/sgnlp/models/coherence_momentum/preprocess.py
new file mode 100644
index 0000000..8d6acb5
--- /dev/null
+++ b/sgnlp/models/coherence_momentum/preprocess.py
@@ -0,0 +1,48 @@
+from typing import List
+
+import torch
+from transformers import XLNetTokenizer
+
+
+class CoherenceMomentumPreprocessor:
+    def __init__(self, model_size, max_len, tokenizer=None):
+        if tokenizer is not None:
+            self.tokenizer = tokenizer
+        else:
+            self.tokenizer = XLNetTokenizer.from_pretrained(f"xlnet-{model_size}-cased")
+
+        self.max_len = max_len
+
+    def __call__(self, texts: List[str]):
+        """
+
+        Args:
+            texts (List[str]): List of input texts
+
+        Returns:
+            Dict[str, str]: Returns a dictionary with the following key-values:
+                "tokenized_texts": (torch.tensor) Tensors of tokenized ids of input texts
+        """
+
+        result = []
+        for text in texts:
+            tokens = self.tokenizer.tokenize(text)
+            ids = self.tokenizer.convert_tokens_to_ids(tokens)
+            ids = self.pad_ids(ids)
+            ids = self.tokenizer.build_inputs_with_special_tokens(ids)
+            result.append(torch.tensor(ids))
+
+        return {"tokenized_texts": torch.stack(result)}
+
+    def pad_ids(self, ids):
+        if len(ids) < self.max_len:
+            padding_size = self.max_len - len(ids)
+            padding = [
+                self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
+                for _ in range(padding_size)
+            ]
+            ids = ids + padding
+        else:
+            ids = ids[: self.max_len]
+
+        return ids
diff --git a/sgnlp/models/coherence_momentum/train.py b/sgnlp/models/coherence_momentum/train.py
index 3b2d02f..ba3284e 100755
--- a/sgnlp/models/coherence_momentum/train.py
+++ b/sgnlp/models/coherence_momentum/train.py
@@ -18,13 +18,14 @@
 
 
 class CoherenceMomentumDataset(Dataset):
-    def __init__(self, fname, model, device, datatype, negs, max_len):
+    def __init__(self, fname, model_size, device, datatype, negs, max_len):
         self.fname = fname
         self.device = device
         self.data = pickle.load(open(fname, "rb"))
         random.shuffle(self.data)
-        self.tokenizer = XLNetTokenizer.from_pretrained("xlnet-{}-cased".format(model))
-        self.truncount = 0
+        self.tokenizer = XLNetTokenizer.from_pretrained(
+            "xlnet-{}-cased".format(model_size)
+        )
         self.datatype = datatype
         self.negs = negs
         self.max_len = max_len
@@ -34,12 +35,11 @@ def pad_ids(self, ids):
             padding_size = self.max_len - len(ids)
             padding = [
                 self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
-                for i in range(padding_size)
+                for _ in range(padding_size)
             ]
             ids = ids + padding
         else:
             ids = ids[: self.max_len]
-            self.truncount += 1
 
         return ids
 
@@ -136,11 +136,11 @@ def __getitem__(self, idx):
 
 
 class LoadData:
-    def __init__(self, fname, batch_size, model, device, datatype, negs, max_len):
+    def __init__(self, fname, batch_size, model_size, device, datatype, negs, max_len):
         self.fname = fname
         self.batch_size = batch_size
         self.dataset = CoherenceMomentumDataset(
-            fname, model, device, datatype, negs, max_len
+            fname, model_size, device, datatype, negs, max_len
         )
 
     def data_loader(self):

From 0408670b8cfd2888f48e62ae02ce3388f3de62bf Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 19 Sep 2022 16:26:22 +0800
Subject: [PATCH 16/20] [#57] Add files for demo api

---
 demo_api/coherence_momentum/api.py            | 46 +++++++++++++++++++
 demo_api/coherence_momentum/dev.Dockerfile    | 14 ++++++
 .../coherence_momentum/download_pretrained.py |  9 ++++
 .../model_card/coherence_momentum.json        | 36 +++++++++++++++
 .../coherence_momentum/requirements_dev.txt   |  3 ++
 demo_api/coherence_momentum/usage.py          | 33 +++++++++++++
 6 files changed, 141 insertions(+)
 create mode 100644 demo_api/coherence_momentum/api.py
 create mode 100644 demo_api/coherence_momentum/dev.Dockerfile
 create mode 100644 demo_api/coherence_momentum/download_pretrained.py
 create mode 100644 demo_api/coherence_momentum/model_card/coherence_momentum.json
 create mode 100644 demo_api/coherence_momentum/requirements_dev.txt
 create mode 100644 demo_api/coherence_momentum/usage.py

diff --git a/demo_api/coherence_momentum/api.py b/demo_api/coherence_momentum/api.py
new file mode 100644
index 0000000..b510ca1
--- /dev/null
+++ b/demo_api/coherence_momentum/api.py
@@ -0,0 +1,46 @@
+from flask import request
+
+from demo_api.common import create_api
+from sgnlp.models.coherence_momentum import (
+    CoherenceMomentumModel,
+    CoherenceMomentumConfig,
+    CoherenceMomentumPreprocessor
+)
+
+app = create_api(app_name=__name__, model_card_path="model_card/coherence_momentum.json")
+
+# Load processors and models
+config = CoherenceMomentumConfig.from_pretrained(
+    "https://storage.googleapis.com/sgnlp/models/coherence_momentum/config.json"
+)
+model = CoherenceMomentumModel.from_pretrained(
+    "https://storage.googleapis.com/sgnlp/models/coherence_momentum/pytorch_model.bin",
+    config=config
+)
+
+preprocessor = CoherenceMomentumPreprocessor(config.model_size, config.max_len)
+
+app.logger.info("Model initialization complete")
+
+
+@app.route("/predict", methods=["POST"])
+def predict():
+    req_body = request.get_json()
+
+    text1 = req_body["text1"]
+    text2 = req_body["text2"]
+
+    text1_tensor = preprocessor([text1])
+    text2_tensor = preprocessor([text2])
+
+    text1_score = model.get_main_score(text1_tensor["tokenized_texts"]).item()
+    text2_score = model.get_main_score(text2_tensor["tokenized_texts"]).item()
+
+    return {
+        "text1_score": text1_score,
+        "text2_score": text2_score
+    }
+
+
+if __name__ == "__main__":
+    app.run()
diff --git a/demo_api/coherence_momentum/dev.Dockerfile b/demo_api/coherence_momentum/dev.Dockerfile
new file mode 100644
index 0000000..2b58fe8
--- /dev/null
+++ b/demo_api/coherence_momentum/dev.Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.8-buster
+
+COPY ./demo_api /demo_api
+COPY ./sgnlp /sgnlp
+COPY ./setup.py /setup.py
+COPY ./README.md /README.md
+
+RUN pip install -r /demo_api/coherence_momentum/requirements_dev.txt
+
+WORKDIR /demo_api/coherence_momentum
+
+RUN python -m download_pretrained
+
+CMD PYTHONPATH=../../ gunicorn -c ../gunicorn.conf.py
\ No newline at end of file
diff --git a/demo_api/coherence_momentum/download_pretrained.py b/demo_api/coherence_momentum/download_pretrained.py
new file mode 100644
index 0000000..94532d9
--- /dev/null
+++ b/demo_api/coherence_momentum/download_pretrained.py
@@ -0,0 +1,9 @@
+from sgnlp.models.coherence_momentum import CoherenceMomentumModel, CoherenceMomentumConfig
+
+config = CoherenceMomentumConfig.from_pretrained(
+    "https://storage.googleapis.com/sgnlp/models/coherence_momentum/config.json"
+)
+model = CoherenceMomentumModel.from_pretrained(
+    "https://storage.googleapis.com/sgnlp/models/coherence_momentum/pytorch_model.bin",
+    config=config
+)
diff --git a/demo_api/coherence_momentum/model_card/coherence_momentum.json b/demo_api/coherence_momentum/model_card/coherence_momentum.json
new file mode 100644
index 0000000..73bae60
--- /dev/null
+++ b/demo_api/coherence_momentum/model_card/coherence_momentum.json
@@ -0,0 +1,36 @@
+{
+  "name": "CoherenceMomentum",
+  "languages": "English",
+  "description": "This is a neural network model that makes use of a momentum encoder and hard negative mining during training. This model is able to take in a piece of text and output a coherence score. The coherence score is only meant for comparison, i.e. it is only meaningful when used to compare between two texts, and the text with the higher coherence score is deemed to be more coherent by the model.",
+  "paper": {
+    "text": "Jwalapuram, P., Joty, S., & Lin, X. (2022). Rethinking Self-Supervision Objectives for Generalizable Coherence Modeling. Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), May 2022 (pp. 6044-6059).",
+    "url": "https://aclanthology.org/2022.acl-long.418/"
+  },
+  "trainingDataset": "Permuted dataset derived from Linguistic Data Consortium's (LDC) Wall Street Journal (WSJ) dataset. Please contact the authors to get the dataset if you have a valid LDC license.",
+  "evaluationDataset": "Permuted dataset derived from Linguistic Data Consortium's (LDC) Wall Street Journal (WSJ) dataset. Please contact the authors to get the dataset if you have a valid LDC license.",
+  "evaluationScores": "0.988 accuracy on permuted WSJ dataset. 0.986 accuracy reported by authors on permuted WSJ dataset.",
+  "trainingConfig": {
+    "text": "https://storage.googleapis.com/sgnlp/models/coherence_momentum/config.json",
+    "url": "https://storage.googleapis.com/sgnlp/models/coherence_momentum/config.json"
+  },
+  "trainingTime": "~24 hours for ~46000 steps (batch size of 1) on a single A100 GPU",
+  "modelWeights": {
+    "text": "https://storage.googleapis.com/sgnlp/models/coherence_momentum/pytorch_model.bin",
+    "url": "https://storage.googleapis.com/sgnlp/models/coherence_momentum/pytorch_model.bin"
+  },
+  "modelInput": "A paragraph of text. During training, each positive example can be paired with one or more negative examples.",
+  "modelOutput": "Coherence score for the input text.",
+  "modelSize": "~930MB",
+  "inferenceInfo": "Not available.",
+  "usageScenarios": "Essay scoring, summarization, language generation.",
+  "originalCode": {
+    "text": "https://github.com/ntunlp/coherence-paradigm",
+    "url": "https://github.com/ntunlp/coherence-paradigm"
+  },
+  "license": {
+    "text": "MIT License",
+    "url": "https://choosealicense.com/licenses/mit"
+  },
+  "contact": "sg-nlp@aisingapore.org",
+  "additionalInfo": "Not applicable."
+}
\ No newline at end of file
diff --git a/demo_api/coherence_momentum/requirements_dev.txt b/demo_api/coherence_momentum/requirements_dev.txt
new file mode 100644
index 0000000..a32201d
--- /dev/null
+++ b/demo_api/coherence_momentum/requirements_dev.txt
@@ -0,0 +1,3 @@
+-e.
+flask
+gunicorn
\ No newline at end of file
diff --git a/demo_api/coherence_momentum/usage.py b/demo_api/coherence_momentum/usage.py
new file mode 100644
index 0000000..63ef79a
--- /dev/null
+++ b/demo_api/coherence_momentum/usage.py
@@ -0,0 +1,33 @@
+from sgnlp.models.coherence_momentum import CoherenceMomentumModel, CoherenceMomentumConfig, \
+    CoherenceMomentumPreprocessor
+
+config = CoherenceMomentumConfig.from_pretrained(
+    "https://storage.googleapis.com/sgnlp/models/coherence_momentum/config.json"
+)
+model = CoherenceMomentumModel.from_pretrained(
+    "https://storage.googleapis.com/sgnlp/models/coherence_momentum/pytorch_model.bin",
+    config=config
+)
+
+preprocessor = CoherenceMomentumPreprocessor(config.model_size, config.max_len)
+
+text1 = "Companies listed below reported quarterly profit substantially different from the average of analysts ' " \
+        "estimates . The companies are followed by at least three analysts , and had a minimum five-cent change in " \
+        "actual earnings per share . Estimated and actual results involving losses are omitted . The percent " \
+        "difference compares actual profit with the 30-day estimate where at least three analysts have issues " \
+        "forecasts in the past 30 days . Otherwise , actual profit is compared with the 300-day estimate . " \
+        "Source : Zacks Investment Research"
+text2 = "The companies are followed by at least three analysts , and had a minimum five-cent change in actual " \
+        "earnings per share . The percent difference compares actual profit with the 30-day estimate where at least " \
+        "three analysts have issues forecasts in the past 30 days . Otherwise , actual profit is compared with the " \
+        "300-day estimate . Source : Zacks Investment Research. Companies listed below reported quarterly profit " \
+        "substantially different from the average of analysts ' estimates . Estimated and actual results involving " \
+        "losses are omitted ."
+
+text1_tensor = preprocessor([text1])
+text2_tensor = preprocessor([text2])
+
+text1_score = model.get_main_score(text1_tensor["tokenized_texts"]).item()
+text2_score = model.get_main_score(text2_tensor["tokenized_texts"]).item()
+
+print(text1_score, text2_score)

From 61dcd1ec035520ef627a4fa74cc69844ef18d764 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 19 Sep 2022 16:26:52 +0800
Subject: [PATCH 17/20] Update demo api readme with example for dev build

---
 demo_api/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/demo_api/README.md b/demo_api/README.md
index 8e7a126..10f64ad 100644
--- a/demo_api/README.md
+++ b/demo_api/README.md
@@ -2,12 +2,13 @@
 ```
 # From root folder of repository:
 docker build -t <model_name> -f demo_api/<model_name>/Dockerfile demo_api/
-
 docker run -p 8000:8000 <model_name>
 
-E.g.
+# Example: Production build
 docker build -t lsr -f demo_api/lsr/Dockerfile demo_api/
-docker run -p 8000:8000 lsr
+
+# Example: Dev build
+docker build -t coherence_momentum -f demo_api/coherence_momentum/dev.Dockerfile .
 ```
 
 ## Notes on dev vs prod build

From e85c5d7e8816414e87a6e9daa58bdb686c98f43f Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Mon, 19 Sep 2022 16:27:06 +0800
Subject: [PATCH 18/20] Minor fix to lif model card

---
 demo_api/lif_3way_ap/model_card/lif_3way_ap.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/demo_api/lif_3way_ap/model_card/lif_3way_ap.json b/demo_api/lif_3way_ap/model_card/lif_3way_ap.json
index a66c09d..a482fc8 100644
--- a/demo_api/lif_3way_ap/model_card/lif_3way_ap.json
+++ b/demo_api/lif_3way_ap/model_card/lif_3way_ap.json
@@ -16,7 +16,8 @@
   },
   "evaluationScores": "0.745 F1 on test_i dataset. 0.75 F1 reported by authors in paper on test_i dataset.",
   "trainingConfig": {
-    "text": "https://storage.googleapis.com/sgnlp/models/lif_3way_ap/config.json"
+    "text": "https://storage.googleapis.com/sgnlp/models/lif_3way_ap/config.json",
+    "url": "https://storage.googleapis.com/sgnlp/models/lif_3way_ap/config.json"
   },
   "trainingTime": "~12 hours for 13 epochs on a single V100 GPU.",
   "modelWeights": {

From b20dcc3907e0e536cd127df8fb23c1da104f3880 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Tue, 20 Sep 2022 11:10:56 +0800
Subject: [PATCH 19/20] [#57] Modify prod pipeline to allow deployment of dev
 builds into production for beta public testing

---
 jsonnet/demo-api.jsonnet     | 32 ++++++++++++++++++++++++++++++++
 jsonnet/dev-demo-api.jsonnet |  5 +++++
 2 files changed, 37 insertions(+)

diff --git a/jsonnet/demo-api.jsonnet b/jsonnet/demo-api.jsonnet
index 90a1185..9ee18df 100644
--- a/jsonnet/demo-api.jsonnet
+++ b/jsonnet/demo-api.jsonnet
@@ -16,6 +16,24 @@ local build_and_push_staging(module_name, image_name) = {
   ],
 };
 
+local build_and_push_dev_staging(module_name, image_name) = {
+  image: "registry.aisingapore.net/sg-nlp/sg-nlp-runner:latest",
+  stage: "build_and_push_staging",
+  tags: [
+    "on-prem",
+    "dind",
+  ],
+  when: "manual",
+  script: [
+    "echo 'Logging in to AISG Docker Registry...'",
+    "echo $STG_REGISTRY_PASSWORD | docker login registry.aisingapore.net -u $STG_DOCKER_USER --password-stdin",
+    "echo 'Building and pushing image...'",
+    "docker build --no-cache -t %s -f demo_api/%s/dev.Dockerfile ." % [module_name, module_name],
+    "docker tag %s registry.aisingapore.net/sg-nlp/%s:latest" % [module_name, image_name],
+    "docker push registry.aisingapore.net/sg-nlp/%s:latest" % image_name,
+  ],
+};
+
 local build_and_push_docs_staging() = {
   image: "python:3.8.11-slim",
   stage: "build_and_push_staging",
@@ -154,6 +172,15 @@ local api_names = {
   }
 };
 
+// To deploy dev builds into production (for beta public testing)
+local dev_api_names = {
+  "coherence_momentum": {
+    module_name: "coherence_momentum",
+    image_name: "coherence-momentum",
+    deployment_name: "coherence-momentum"
+  }
+};
+
 {
   "stages": [
     "build_and_push_staging",
@@ -166,6 +193,11 @@ local api_names = {
   [api_names[key]["module_name"] + "_build_and_push_staging"]:
     build_and_push_staging(api_names[key]["module_name"], api_names[key]["image_name"])
     for key in std.objectFields(api_names)
+} + {
+  // Build and push dev staging
+  [dev_api_names[key]["module_name"] + "_build_and_push_dev_staging"]:
+    build_and_push_dev_staging(dev_api_names[key]["module_name"], dev_api_names[key]["image_name"])
+    for key in std.objectFields(dev_api_names)
 } + {
   // Restart kubernetes staging
   [api_names[key]["module_name"] + "_restart_kubernetes_staging"]:
diff --git a/jsonnet/dev-demo-api.jsonnet b/jsonnet/dev-demo-api.jsonnet
index 3239732..25ac1ba 100644
--- a/jsonnet/dev-demo-api.jsonnet
+++ b/jsonnet/dev-demo-api.jsonnet
@@ -59,6 +59,11 @@ local api_names = {
     module_name: "ufd",
     image_name: "ufd",
     deployment_name: "ufd"
+  },
+  "coherence_momentum": {
+    module_name: "coherence_momentum",
+    image_name: "coherence-momentum",
+    deployment_name: "coherence-momentum"
   }
 };
 

From f4cc3fbdf96f764982086acfc6f4dfeb8d566176 Mon Sep 17 00:00:00 2001
From: Jonathan Heng <jonheng@users.noreply.github.com>
Date: Tue, 20 Sep 2022 14:25:18 +0800
Subject: [PATCH 20/20] Fix broken tests from updates to transformers package

---
 sgnlp/models/emotion_entailment/modeling.py | 30 +++++++++++++++++--
 sgnlp/models/span_extraction/modeling.py    | 32 +++++++++++++++++++--
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/sgnlp/models/emotion_entailment/modeling.py b/sgnlp/models/emotion_entailment/modeling.py
index 2eb7c78..5d2b9fc 100644
--- a/sgnlp/models/emotion_entailment/modeling.py
+++ b/sgnlp/models/emotion_entailment/modeling.py
@@ -1,3 +1,6 @@
+from typing import Optional
+
+import torch
 from transformers import RobertaForSequenceClassification
 
 
@@ -34,5 +37,28 @@ class RecconEmotionEntailmentModel(RobertaForSequenceClassification):
     def __init__(self, config):
         super().__init__(config)
 
-    def forward(self, **kwargs):
-        return super().forward(**kwargs)
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.FloatTensor] = None,
+        token_type_ids: Optional[torch.LongTensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        head_mask: Optional[torch.FloatTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ):
+        return super().forward(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            labels=labels,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
diff --git a/sgnlp/models/span_extraction/modeling.py b/sgnlp/models/span_extraction/modeling.py
index 473585d..07f1033 100644
--- a/sgnlp/models/span_extraction/modeling.py
+++ b/sgnlp/models/span_extraction/modeling.py
@@ -1,3 +1,6 @@
+from typing import Optional
+
+import torch
 from transformers import BertForQuestionAnswering
 
 
@@ -43,5 +46,30 @@ class RecconSpanExtractionModel(BertForQuestionAnswering):
     def __init__(self, config):
         super().__init__(config)
 
-    def forward(self, **kwargs):
-        return super().forward(**kwargs)
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        start_positions: Optional[torch.Tensor] = None,
+        end_positions: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ):
+        return super().forward(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            start_positions=start_positions,
+            end_positions=end_positions,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )