CODAIT · tedhtchang · Dec 30, 2019 · Jan 29, 2020 · Feb 12, 2020 · Feb 12, 2020
diff --git a/bert-text-classifier/README.md b/bert-text-classifier/README.md
@@ -0,0 +1,121 @@
+# MAX for TensorFlow.js: Text Sentiment Classifier
+
+This is a TensorFlow.js port of the [MAX Text Sentiment Classifier](https://developer.ibm.com/exchanges/models/all/max-text-sentiment-classifier/) This model is able to detect whether a text fragment leans towards a positive or a negative sentiment.
+
+## Install
+
+### Browser
+
+```html
+<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
+<script src="https://cdn.jsdelivr.net/npm/@codait/text-sentiment-classifier"></script>
+```
+
+### Node.js
+
+```
+npm install --save @codait/text-sentiment-classifier
+```
+
+## Usage
+
+The complete examples for browser and Node.js environments are in the [`/examples`](https://github.com/CODAIT/max-tfjs-models/tree/master/text-sentiment-classifier/examples) directory.
+
+### Browser
+
+> **Note**: _When loaded in a browser, the global variable `textSentimentClassifier` will be available to access the API._
+
+```javascript
+
+textSentimentClassifier
+  .predict("i like strawberries")
+  .then(prediction => {
+    console.log(prediction)
+  });
+```
+
+### Node.js
+
+```javascript
+const tc = require('@codait/text-sentiment-classifier');
+tc.predict("i like strawberries").then(res=>console.log(res)); //{ pos: 0.9981953501701355, neg: 0.0018045296892523766 }
+
+```
+
+### API
+
+- **loadModel()**
+
+  Loads the model files.
+
+  Running in Node.js the first time will download the model assets locally under `/model` directory. The subsequent calls will load the model from the directory.
+
+  Returns the TensorFlow.js model.
+
+- **processInput(text)**
+
+  Processes the input text to the shape and format expected by the model.
+
+  `text` - sentence to be processed. It should be a sentence with a period although this is not necessary.
+
+  Returns a named tensor map that contains:
+  `{'segment_ids_1': Tensor of shape [128],
+    'input_ids_1': Tensor of shape [128],
+    'input_mask_1': Tensor of shape [128]}`
+
+- **runInference(inputFeatures)**
+
+  Runs inference on the named tensor map passed. The output is a tensor that contains softmax of positive and negative percentages.
+
+  `inputFeature` - a named tensor map representation of a text.
+
+  Returns the inference results as a 1D tensor.
+
+- **processOutput(tensor)**
+
+  Transform the inference output to a Json object.
+
+  `tensor` - the model output from running inference.
+
+  Returns an object containing: `{neg: number, pos: number}`
+
+
+- **predict(text)**
+
+  Loads the model, processes the input text, runs inference, processes the inference output, and returns a prediction object. This is a convenience function to avoid having to call each of the functions (`loadModel`, `processInput`, `runInference`, `processOutput`) individually.
+
+  `text` - sentence to be analyzed. It should be a sentence with a period although this is not necessary.
+
+  Returns an object containing: `{neg: number, pos: number}`
+
+- **encode(text)**
+
+  Tokenize the text as token ids using the BERT 32k vocabularies.
+
+  `text` - sentence to be encoded.
+
+  Returns an array of BERT token ids.
+
+- **idsToTokens(ids)**
+
+  Transform the BERT token ids into tokens.
+
+  `ids` - BERT token ids.
+
+  Returns an array of BERT tokens.
+
+- **version**
+
+  Returns the version
+
+## Model
+
+The model assets produced by converting the pre-trained model to the TensorFlow.js format can be found in the `/model` directory after loadModel is called in Node.js.
+
+## Resources
+
+- [MAX Text Sentiment Classifier](https://developer.ibm.com/exchanges/models/all/max-text-sentiment-classifier/)
+
+## License
+
+[Apache-2.0](https://github.com/CODAIT/max-tfjs-models/blob/master/LICENSE)
diff --git a/bert-text-classifier/examples/test.html b/bert-text-classifier/examples/test.html
@@ -0,0 +1,44 @@
+<meta charset="utf-8"/>
+<html>
+  <head>
+  <title>text classifier</title>
+  </head>
+  <body onload=loadModel()>
+    <h1>Text Sentiment Classifier</h1>
+    <div id="div1">
+      <input type="text" id="text_input" name="text_input" value="Enter a sentence.">
+      <input type="button" id="submit_text" value="Analyze">
+    </div>
+    <p>
+      <label for="status">status:</label>
+      <output name="status" id="status">Loading Model...</output>
+    </p>
+  </body>
+  <script src="file://///wsl$/Ubuntu/home/tedchang/pub_git/max-tfjs-models/bert-text-classifier/dist/src/max.sentimentclass.js"></script>
+  <script>
+    const submitButton = document.getElementById("submit_text");
+    const statusElement = document.getElementById('status');
+    submitButton.addEventListener("click", runPredict);
+    function loadModel(){
+      submitButton.setAttribute('disabled','true')
+      textSentimentClassifier.loadModel().then( () =>{
+        submitButton.removeAttribute('disabled');
+        updateStatus("Model Loaded", append=false);
+      })
+    }
+    function updateStatus(msg, append=true) {
+      statusElement.innerHTML += msg;
+      if(!append)
+        statusElement.innerHTML = msg;
+    }
+    async function runPredict(){
+      const text = document.getElementById("text_input").value;
+      textSentimentClassifier.predict(text).then((res) =>{
+        updateStatus(`
+          <br> ${text}
+          <br>Positive &#128512;: ${res['pos'].toFixed(4)}
+          <br>Negative &#128534;: ${res['neg'].toFixed(4)}<br>`);
+      }
+  )}
+  </script>
+</html>
diff --git a/bert-text-classifier/examples/test.js b/bert-text-classifier/examples/test.js
@@ -0,0 +1,3 @@
+const tc = require("../dist/src/max.sentimentclass.cjs.js");
+tc.predict("i like strawberries").then(res=>console.log(res));
+tc.encode("i like strawberries").then(res=>console.log(res));
diff --git a/bert-text-classifier/package.json b/bert-text-classifier/package.json
@@ -0,0 +1,81 @@
+{
+  "name": "@codait/text-sentiment-classifier",
+  "version": "0.1.0",
+  "description": "This model is able to detect whether a text fragment leans towards a positive or a negative sentiment.",
+  "main": "dist/src/max.sentimentclass.cjs.js",
+  "module": "dist/src/max.sentimentclass.es.js",
+  "jsdelivr": "dist/src/max.sentimentclass.js",
+  "unpkg": "dist/max.sentimentclass.js",
+  "scripts": {
+    "clean": "rimraf dist && mkdirp dist",
+    "rollup": "rollup --config rollup.config.js",
+    "copy": "ncp model dist/model",
+    "build": "npm run clean && npm run rollup",
+    "test": "npm run build && jasmine-ts",
+    "watch:js": "rollup --config rollup.config.js --watch",
+    "watch:ts": "tsc --watch",
+    "dev": "concurrently -c \"bgBlue.bold,bgMagenta.bold\" \"npm:watch:js\" \"npm:watch:ts\""
+  },
+  "files": [
+    "dist",
+    "model",
+    "src"
+  ],
+  "author": "Ted Chang (https://github.com/tedhtchang/)",
+  "license": "Apache-2.0",
+  "keywords": [
+    "bert text sentiment classifier",
+    "model asset exchange",
+    "max",
+    "tensorflow.js",
+    "tensorflowjs",
+    "tensorflow",
+    "tf.js",
+    "tfjs",
+    "machine learning",
+    "Wordpiece tokenizer"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/CODAIT/max-tfjs-models.git"
+  },
+  "bugs": {
+    "url": "https://github.com/CODAIT/max-tfjs-models/issues"
+  },
+  "homepage": "https://github.com/CODAIT/max-tfjs-models/tree/master/bert-text-classifier#readme",
+  "dependencies": {
+    "@tensorflow-models/universal-sentence-encoder": "^1.2.1",
+    "@tensorflow/tfjs": "^1.5.1",
+    "@tensorflow/tfjs-node": "^1.5.1",
+    "@types/express": "^4.16.1",
+    "@types/tar": "^4.0.3",
+    "express": "^4.17.1",
+    "node-fetch": "^2.6.0",
+    "numpy": "0.0.1",
+    "tar": "^5.0.5"
+  },
+  "devDependencies": {
+    "@types/jasmine": "^3.5.0",
+    "@types/rollup-plugin-json": "^3.0.2",
+    "builtin-modules": "^3.1.0",
+    "concurrently": "^4.1.0",
+    "jasmine": "^3.5.0",
+    "jasmine-ts": "^0.3.0",
+    "mkdirp": "^0.5.1",
+    "ncp": "^2.0.0",
+    "rimraf": "^2.6.3",
+    "rollup": "^1.13.1",
+    "rollup-plugin-json": "^4.0.0",
+    "rollup-plugin-node-resolve": "^5.0.1",
+    "rollup-plugin-replace": "^2.2.0",
+    "rollup-plugin-typescript2": "^0.21.1",
+    "ts-node": "^8.5.4",
+    "typescript": "^3.7.3",
+    "unicode-12.1.0": "^0.8.0"
+  },
+  "standard": {
+    "ignore": [
+      "dist"
+    ]
+  }
+}
diff --git a/bert-text-classifier/rollup.config.js b/bert-text-classifier/rollup.config.js
@@ -0,0 +1,78 @@
+import node from 'rollup-plugin-node-resolve';
+import typescript from 'rollup-plugin-typescript2';
+import json from 'rollup-plugin-json';
+import builtins from 'builtin-modules';
+import replace from 'rollup-plugin-replace';
+
+const jsonPlugin = json({
+  include: './package.json',
+  preferConst: true,
+  indent: '  ',
+  compact: true,
+  namedExports: ['version']
+})
+
+export default[
+  {
+    input: 'src/text-sentiment-classifier.ts',
+    output: [
+      {
+        name: 'textSentimentClassifier',
+        file: 'dist/src/max.sentimentclass.js',
+        format: 'iife',
+        sourcemap: true
+      },
+      {
+        name: 'textSentimentClassifier',
+        file: 'dist/src/max.sentimentclass.es.js',
+        format: 'es',
+        sourcemap: true
+      }
+    ],
+    plugins: [
+      typescript({
+        clean: true,
+        tsconfigOverride: {
+          compilerOptions: {
+            module: 'ES2015',
+            noUnusedLocals: false,
+            inlineSourceMap: false
+          }
+        }
+      }),
+      replace({
+        'server/sentimentanalysis': 'js/sentimentanalysis',
+        include: ['src/text-sentiment-classifier.ts']
+      }),
+      jsonPlugin,
+      node(),
+    ],
+    external: builtins
+  },
+  {
+    input: 'src/text-sentiment-classifier.ts',
+    output:[
+      {
+        name: 'textSentimentClassifier',
+        file: 'dist/src/max.sentimentclass.cjs.js',
+        format: 'cjs',
+        sourcemap: true
+      },
+    ],
+    plugins: [
+      typescript({
+        clean: true,
+        tsconfigOverride: {
+          compilerOptions: {
+            module: 'ES2015',
+            noUnusedLocals: false,
+            inlineSourceMap: false
+          }
+        }
+      }),
+      jsonPlugin,
+      node()
+    ],
+    external: builtins
+  }
+]
diff --git a/bert-text-classifier/spec/support/jasmine.json b/bert-text-classifier/spec/support/jasmine.json
@@ -0,0 +1,3 @@
+{
+    "spec_files":["test/test.ts"]
+}
diff --git a/bert-text-classifier/src/js/sentimentanalysis.ts b/bert-text-classifier/src/js/sentimentanalysis.ts
@@ -0,0 +1,51 @@
+import WordPieceTokenizer from "../tokenization";
+import * as tf from '@tensorflow/tfjs';
+
+const vocabUrl = 'https://s3.us-south.cloud-object-storage.appdomain.cloud/max-assets-prod/max-text-sentiment-classifier/tfjs/0.1.0/vocab.json'
+const modelUrl = 'https://s3.us-south.cloud-object-storage.appdomain.cloud/max-assets-prod/max-text-sentiment-classifier/tfjs/0.1.0/model.json';
+
+
+export default class SentimentAnalysis {
+  private _model: tf.GraphModel;
+  private _tokenizer: WordPieceTokenizer;
+
+  public get tokenizer(): WordPieceTokenizer {
+    return this._tokenizer;
+  }
+
+  public get model() : tf.GraphModel {
+    return this._model;
+  }
+
+  async init(){
+    if(! this.model) await this.loadModel();
+    if(! this.tokenizer) await this.loadTokenizer();
+  }
+
+  async loadModel(){
+    this._model = await tf.loadGraphModel(
+      modelUrl, {requestInit: {headers: {"origin": "localhost"}}})
+    // console.log(`Model loaded from ${modelUrl}.`);
+  }
+
+  async loadTokenizer(){
+    this._tokenizer = new WordPieceTokenizer(true);
+    await this.tokenizer.init(vocabUrl);
+    // console.log("Tokenizer loaded.")
+  }
+  /**
+ * Classify a text input and return a json object with pos and neg
+ * sentiment percentages
+ */
+  async analyzeText(text: string){
+    return await this.inference(await this.tokenizer.inputFeature(text));
+  }
+
+  async inference(feature: tf.NamedTensorMap){
+    if (! this.model) await this.loadModel();
+    return tf.tidy(() => {
+      let pred: tf.Tensor = this.model.execute({...feature}, 'loss/Softmax') as tf.Tensor;
+      return pred.squeeze([0]);
+    });
+  }
+}