microsoft · mokarian · Jul 2, 2020 · Jul 2, 2020 · Jul 2, 2020 · Jul 2, 2020
diff --git a/.env.example b/.env.example
@@ -69,13 +69,16 @@ AML_CLUSTER_MAX_NODES_SCORING = '4'
 AML_CLUSTER_MIN_NODES_SCORING = '0'
 AML_CLUSTER_PRIORITY_SCORING = 'lowpriority'
 AML_REBUILD_ENVIRONMENT_SCORING = 'true'
-BATCHSCORE_SCRIPT_PATH = 'scoring/parallel_batchscore.py'
-BATCHSCORE_COPY_SCRIPT_PATH = 'scoring/parallel_batchscore_copyoutput.py'
+BATCHSCORE_SCRIPT_PATH = 'diabetes_regression/scoring/parallel_batchscore.py'
+BATCHSCORE_COPY_SCRIPT_PATH = 'diabetes_regression/scoring/parallel_batchscore_copyoutput.py'
 
 
 SCORING_DATASTORE_INPUT_CONTAINER = 'input'
 SCORING_DATASTORE_INPUT_FILENAME = 'diabetes_scoring_input.csv'
 SCORING_DATASTORE_OUTPUT_CONTAINER = 'output'
 SCORING_DATASTORE_OUTPUT_FILENAME = 'diabetes_scoring_output.csv'
 SCORING_DATASET_NAME = 'diabetes_scoring_ds'
-SCORING_PIPELINE_NAME = 'diabetes-scoring-pipeline'
+SCORING_PIPELINE_NAME = 'diabetes-scoring-pipeline'
+
+# Observability
+APP_INSIGHTS_CONNECTION_STRING = ''
diff --git a/.gitignore b/.gitignore
@@ -109,3 +109,6 @@ condaenv.*
 .mypy_cache/
 
 .DS_Store
+
+#pycharm
+.idea
diff --git a/.pipelines/diabetes_regression-batchscoring-ci.yml b/.pipelines/diabetes_regression-batchscoring-ci.yml
@@ -68,6 +68,8 @@ stages:
           python -m ml_service.pipelines.diabetes_regression_build_parallel_batchscore_pipeline
       env:
         SCORING_DATASTORE_ACCESS_KEY: $(SCORING_DATASTORE_ACCESS_KEY)
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+
 
   - job: "Run_Batch_Score_Pipeline"
     displayName: "Run Batch Scoring Pipeline"

diff --git a/.pipelines/diabetes_regression-cd.yml b/.pipelines/diabetes_regression-cd.yml
@@ -55,7 +55,7 @@ stages:
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
         scriptLocation: inlineScript
-        workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring
+        workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/diabetes_regression/scoring
         inlineScript: |
           set -e # fail on error
 
@@ -101,7 +101,7 @@ stages:
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
         scriptLocation: inlineScript
-        workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring
+        workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/diabetes_regression/scoring
         inlineScript: |
           set -e # fail on error
 
@@ -111,6 +111,8 @@ stages:
           --dc deployment_config_aks.yml \
           -g $(RESOURCE_GROUP) --workspace-name $(WORKSPACE_NAME) \
           --overwrite -v
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
     - task: AzureCLI@1
       displayName: 'Smoke test'
       inputs:
@@ -120,6 +122,8 @@ stages:
           set -e # fail on error
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
           python -m ml_service.util.smoke_test_scoring_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)"
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
 
 - stage: 'Deploy_Webapp'
   displayName: 'Deploy to Webapp'
@@ -138,8 +142,8 @@ stages:
     - template: diabetes_regression-package-model-template.yml
       parameters:
         modelId: $(MODEL_NAME):$(get_model.MODEL_VERSION)
-        scoringScriptPath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/score.py'
-        condaFilePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/conda_dependencies.yml'
+        scoringScriptPath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/diabetes_regression/scoring/score.py'
+        condaFilePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/diabetes_regression/conda_dependencies.yml'
     - script: echo $(IMAGE_LOCATION) >image_location.txt
       displayName: "Write image location file"
     - task: AzureWebAppContainer@1
@@ -159,3 +163,5 @@ stages:
           set -e # fail on error
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
           python -m ml_service.util.smoke_test_scoring_service --type Webapp --service "$(WebAppDeploy.AppServiceApplicationUrl)/score"
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
diff --git a/.pipelines/diabetes_regression-ci.yml b/.pipelines/diabetes_regression-ci.yml
@@ -45,6 +45,8 @@ stages:
           # Invoke the Python building and publishing a training pipeline
           python -m ml_service.pipelines.diabetes_regression_build_train_pipeline
       displayName: 'Publish Azure Machine Learning Pipeline'
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
 
 - stage: 'Trigger_AML_Pipeline'
   displayName: 'Train and evaluate model'
@@ -70,6 +72,8 @@ stages:
           # Set AMLPIPELINEID variable for next AML Pipeline task in next job
           AMLPIPELINEID="$(cat pipeline_id.txt)"
           echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
       name: 'getpipelineid'
       displayName: 'Get Pipeline ID'
   - job: "Run_ML_Pipeline"
@@ -87,6 +91,8 @@ stages:
         PipelineId: '$(AMLPIPELINE_ID)'
         ExperimentName: '$(EXPERIMENT_NAME)'
         PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}'
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
   - job: "Training_Run_Report"
     dependsOn: "Run_ML_Pipeline"
     condition: always()

diff --git a/.pipelines/diabetes_regression-get-model-id-artifact-template.yml b/.pipelines/diabetes_regression-get-model-id-artifact-template.yml
@@ -27,6 +27,8 @@ steps:
         runId: '${{ parameters.artifactBuildId }}'
       runBranch: '$(Build.SourceBranch)'
       path: $(Build.SourcesDirectory)/bin
+    env:
+      APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
   - task: Bash@3
     name: get_model
     displayName: Parse Json for Model Name and Version

diff --git a/.pipelines/diabetes_regression-publish-model-artifact-template.yml b/.pipelines/diabetes_regression-publish-model-artifact-template.yml
@@ -25,5 +25,7 @@ steps:
       echo $FOUND_MODEL >model.json
   name: 'getversion'
   displayName: "Determine if evaluation succeeded and new model is registered (CLI)"
+  env:
+    APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
 - publish: model.json
   artifact: model
diff --git a/.pipelines/diabetes_regression-variables-template.yml b/.pipelines/diabetes_regression-variables-template.yml
@@ -3,19 +3,19 @@ variables:
   # Source Config
   # The directory containing the scripts for training, evaluating, and registering the model
   - name: SOURCES_DIR_TRAIN
-    value: diabetes_regression
+    value: .
     # The path to the model training script under SOURCES_DIR_TRAIN
   - name: TRAIN_SCRIPT_PATH
-    value: training/train_aml.py
+    value: diabetes_regression/training/train_aml.py
     # The path to the model evaluation script under SOURCES_DIR_TRAIN
   - name: EVALUATE_SCRIPT_PATH
-    value: evaluate/evaluate_model.py
+    value: diabetes_regression/evaluate/evaluate_model.py
     # The path to the model registration script under SOURCES_DIR_TRAIN
   - name: REGISTER_SCRIPT_PATH
-    value: register/register_model.py
+    value: diabetes_regression/register/register_model.py
     # The path to the model scoring script relative to SOURCES_DIR_TRAIN
   - name: SCORE_SCRIPT
-    value: scoring/score.py
+    value: diabetes_regression/scoring/score.py
 
 
   # Azure ML Variables
@@ -66,8 +66,8 @@ variables:
   #   value: "true"
 
   # Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml.
-  # - name: AML_REBUILD_ENVIRONMENT
-  #  value: "false"
+#  - name: AML_REBUILD_ENVIRONMENT
+#    value: "true"
 
   # Variables below are used for controlling various aspects of batch scoring
   - name: USE_GPU_FOR_SCORING
@@ -95,9 +95,9 @@ variables:
     value: lowpriority
   # The path to the batch scoring script relative to SOURCES_DIR_TRAIN
   - name: BATCHSCORE_SCRIPT_PATH
-    value: scoring/parallel_batchscore.py
+    value: diabetes_regression/scoring/parallel_batchscore.py
   - name: BATCHSCORE_COPY_SCRIPT_PATH
-    value: scoring/parallel_batchscore_copyoutput.py
+    value: diabetes_regression/scoring/parallel_batchscore_copyoutput.py
   # Flag to allow rebuilding the AML Environment after it was built for the first time. 
   # This enables dependency updates from the conda dependencies yaml for scoring activities.
   - name: AML_REBUILD_ENVIRONMENT_SCORING
@@ -126,4 +126,7 @@ variables:
   # Scoring pipeline name
   - name: SCORING_PIPELINE_NAME
     value: "diabetes-scoring-pipeline"
-
+
+  #Observability
+  - name: APP_INSIGHTS_CONNECTION_STRING
+    value: ""
diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml
@@ -27,3 +27,6 @@ dependencies:
       - flake8==3.7.*
       - flake8_formatter_junit_xml==0.0.*
       - azure-cli==2.3.*
+      - opencensus==0.7.7
+      - opencensus-context==0.1.1
+      - opencensus-ext-azure==1.0.2
diff --git a/diabetes_regression/conda_dependencies.yml b/diabetes_regression/conda_dependencies.yml
@@ -37,3 +37,12 @@ dependencies:
 
       # MLOps with R
       - azure-storage-blob
+
+      # Observability
+      - opencensus==0.7.7
+      - opencensus-context==0.1.1
+      - opencensus-ext-azure==1.0.2
+      - python-dotenv==0.10.3
+
+      # Data Classes
+      - dataclasses
diff --git a/diabetes_regression/conda_dependencies_scorecopy.yml b/diabetes_regression/conda_dependencies_scorecopy.yml
@@ -29,3 +29,12 @@ dependencies:
 
       # Score copying deps
       - azure-storage-blob
+
+      # Observability
+      - opencensus==0.7.7
+      - opencensus-context==0.1.1
+      - opencensus-ext-azure==1.0.2
+      - python-dotenv==0.10.3
+
+      # Data Classes
+      - dataclasses
diff --git a/diabetes_regression/conda_dependencies_scoring.yml b/diabetes_regression/conda_dependencies_scoring.yml
@@ -30,3 +30,12 @@ dependencies:
       # Scoring deps
       - scikit-learn
       - pandas
+
+      # Observability
+      - opencensus==0.7.7
+      - opencensus-context==0.1.1
+      - opencensus-ext-azure==1.0.2
+      - python-dotenv==0.10.3
+
+      # Data Classes
+      - dataclasses
diff --git a/diabetes_regression/evaluate/evaluate_model.py b/diabetes_regression/evaluate/evaluate_model.py
@@ -26,7 +26,11 @@
 from azureml.core import Run
 import argparse
 import traceback
-from util.model_helper import get_model
+from diabetes_regression.util.model_helper import get_model
+from utils.logger.logger_interface import Severity
+from utils.logger.observability import Observability
+
+observability = Observability()
 
 run = Run.get_context()
 
@@ -42,7 +46,7 @@
 #     load_dotenv()
 #     sources_dir = os.environ.get("SOURCES_DIR_TRAIN")
 #     if (sources_dir is None):
-#         sources_dir = 'diabetes_regression'
+#         sources_dir = '.'
 #     path_to_util = os.path.join(".", sources_dir, "util")
 #     sys.path.append(os.path.abspath(path_to_util))  # NOQA: E402
 #     from model_helper import get_model
@@ -89,7 +93,8 @@
 parser.add_argument(
     "--allow_run_cancel",
     type=str,
-    help="Set this to false to avoid evaluation step from cancelling run after an unsuccessful evaluation",  # NOQA: E501
+    help="Set this to false to avoid evaluation step from cancelling "
+         "run after an unsuccessful evaluation",
     default="true",
 )
 
@@ -109,42 +114,47 @@
     tag_name = 'experiment_name'
 
     model = get_model(
-                model_name=model_name,
-                tag_name=tag_name,
-                tag_value=exp.name,
-                aml_workspace=ws)
+        model_name=model_name,
+        tag_name=tag_name,
+        tag_value=exp.name,
+        aml_workspace=ws)
 
     if (model is not None):
         production_model_mse = 10000
         if (metric_eval in model.tags):
             production_model_mse = float(model.tags[metric_eval])
         new_model_mse = float(run.parent.get_metrics().get(metric_eval))
         if (production_model_mse is None or new_model_mse is None):
-            print("Unable to find", metric_eval, "metrics, "
-                  "exiting evaluation")
-            if((allow_run_cancel).lower() == 'true'):
+            observability.log("Unable to find" +
+                              metric_eval + "metrics, exiting evaluation")
+            if ((allow_run_cancel).lower() == 'true'):
                 run.parent.cancel()
         else:
-            print(
+            observability.log(
                 "Current Production model mse: {}, "
                 "New trained model mse: {}".format(
                     production_model_mse, new_model_mse
                 )
             )
 
         if (new_model_mse < production_model_mse):
-            print("New trained model performs better, "
-                  "thus it should be registered")
+            observability.log("New trained model performs better, "
+                              "thus it should be registered")
         else:
-            print("New trained model metric is worse than or equal to "
-                  "production model so skipping model registration.")
-            if((allow_run_cancel).lower() == 'true'):
+            observability.log("New trained model metric is worse "
+                              "than or equal to "
+                              "production model so skipping "
+                              "model registration.")
+            if ((allow_run_cancel).lower() == 'true'):
                 run.parent.cancel()
     else:
-        print("This is the first model, "
-              "thus it should be registered")
+        observability.log("This is the first model, "
+                          "thus it should be registered")
 
 except Exception:
     traceback.print_exc(limit=None, file=None, chain=True)
-    print("Something went wrong trying to evaluate. Exiting.")
+    observability.log(
+        description="Something went wrong trying to evaluate. Exiting.",
+        severity=Severity.ERROR)
+
     raise