Merge branch 'main' into tvm-0.12.0

microsoft · Jan 29, 2024 · 5594e20 · 5594e20
2 parents d7565ca + 2036a4a
commit 5594e20
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 34 deletions.
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -30,9 +30,9 @@ jobs:
       TVM_VERSION_TAG: v0.15.0
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Lint with flake8
@@ -128,18 +128,18 @@ jobs:
         python setup.py bdist_wheel
         python -m pip install dist/tvm-*.whl
 
-    # We don't run pytest for Linux py3.8 since we do coverage for that case.
+    # We don't run pytest for Linux py3.9 since we do coverage for that case.
     - name: Test with pytest
-      if: ${{ matrix.python-version != '3.8' || startsWith(matrix.os, 'ubuntu') != true }}
-      run: pytest -v
-    # Run and push coverage only for Linux py3.8
-    - name: Coverage 3.8 Linux
-      if: ${{ matrix.python-version == '3.8' && startsWith(matrix.os, 'ubuntu') }}
+      if: ${{ matrix.python-version != '3.9' || startsWith(matrix.os, 'ubuntu') != true }}
+      run: pytest
+    # Run and push coverage only for Linux py3.9
+    - name: Coverage 3.9 Linux
+      if: ${{ matrix.python-version == '3.9' && startsWith(matrix.os, 'ubuntu') }}
       run: |
         coverage run -a -m pytest tests -v
         coverage xml
     - name: Upload coverage to Codecov
-      if: ${{ matrix.python-version == '3.8' && startsWith(matrix.os, 'ubuntu') }}
+      if: ${{ matrix.python-version == '3.9' && startsWith(matrix.os, 'ubuntu') }}
       uses: codecov/codecov-action@v3
       with:
         file: ./coverage.xml
@@ -151,14 +151,15 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-22.04]
-        python-version: ['3.8']
+        python-version: ['3.9']
     permissions: # here we push documentation
       actions: read
-      deployments: write
+      contents: write
+    if: github.ref == 'refs/heads/main' # only run this on main after PR merged
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Generate Documentation
@@ -170,7 +171,6 @@ jobs:
         # now refresh the code
         git checkout . && python -m pip install -e .[tests] -f https://download.pytorch.org/whl/torch_stable.html
     - name: Deploy to GitHub pages 🚀
-      if: ${{ github.ref == 'refs/heads/main' && matrix.python-version == '3.8' && startsWith(matrix.os, 'ubuntu') }}
       uses: JamesIves/[email protected]
       with:
         branch: gh-pages

diff --git a/hummingbird/ml/_parse.py b/hummingbird/ml/_parse.py
@@ -21,7 +21,7 @@
 from sklearn.ensemble import BaggingClassifier, BaggingRegressor
 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
 from sklearn.multioutput import MultiOutputRegressor, RegressorChain
-from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
 
 from .containers import CommonSklearnModelContainer, CommonONNXModelContainer, CommonSparkMLModelContainer
 from ._topology import Topology
@@ -35,7 +35,7 @@
 except ImportError:
     StackingClassifier = None
 
-do_not_merge_columns = tuple(filter(lambda op: op is not None, [OneHotEncoder, ColumnTransformer]))
+do_not_merge_columns = tuple(filter(lambda op: op is not None, [OneHotEncoder, ColumnTransformer, FunctionTransformer]))
 
 
 def parse_sklearn_api_model(model, extra_config={}):
@@ -473,6 +473,26 @@ def _parse_sklearn_column_transformer(topology, model, inputs):
     return transformed_result_names
 
 
+def _parse_sklearn_function_transformer(topology, model, inputs):
+    """
+    Taken from https://github.com/onnx/sklearn-onnx/blob/fdb52cec86d4d19401cc365db97650fd7692676b/skl2onnx/operator_converters/function_transformer.py#L10.  # noqa: E501
+    :param topology: Topology object
+    :param model: A *scikit-learn* *FunctionTransformer* object
+    :param inputs: A list of Variable objects
+    :return: Output produced by function transformer
+    """
+    if len(inputs) == 1:
+        # apply identity
+        return inputs
+    else:
+        # apply concat
+        conc_op = topology.declare_logical_operator("SklearnConcat")
+        conc_op.inputs = inputs
+        conc_names = topology.declare_logical_variable("concat_inputs")
+        conc_op.outputs.append(conc_names)
+        return conc_op.outputs
+
+
 def _parse_sklearn_stacking(topology, model, inputs):
     """
     Taken from https://github.com/onnx/sklearn-onnx/blob/9939c089a467676f4ffe9f3cb91098c4841f89d8/skl2onnx/_parse.py#L238.
@@ -549,6 +569,7 @@ def _build_sklearn_api_parsers_map():
     # Parsers for edge cases are going here.
     map_parser = {
         ColumnTransformer: _parse_sklearn_column_transformer,
+        FunctionTransformer: _parse_sklearn_function_transformer,
         GridSearchCV: _parse_sklearn_model_selection,
         MultiOutputRegressor: _parse_sklearn_multi_output_regressor,
         pipeline.Pipeline: _parse_sklearn_pipeline,

diff --git a/hummingbird/ml/operator_converters/_imputer_implementations.py b/hummingbird/ml/operator_converters/_imputer_implementations.py
@@ -36,7 +36,7 @@ def __init__(self, logical_operator, device, statistics=None, missing=None, stra
         self.transformer = True
         self.do_mask = strategy == "constant" or all(b_mask)
         self.mask = torch.nn.Parameter(torch.LongTensor([] if self.do_mask else i_mask), requires_grad=False)
-        self.replace_values = torch.nn.Parameter(torch.tensor([stats_], dtype=torch.float32), requires_grad=False)
+        self.replace_values = torch.nn.Parameter(torch.tensor(np.array([stats_]), dtype=torch.float32), requires_grad=False)
 
         self.is_nan = True if (missing_values == "NaN" or np.isnan(missing_values)) else False
         if not self.is_nan:

diff --git a/tests/test_sklearn_bagging.py b/tests/test_sklearn_bagging.py
@@ -13,7 +13,7 @@
 class TestSklearnBagging(unittest.TestCase):
     def test_bagging_svc_1(self):
         X, y = make_classification(n_samples=100, n_features=4, n_informative=2, n_redundant=0, random_state=0, shuffle=False)
-        clf = BaggingClassifier(base_estimator=SVC(), n_estimators=10, random_state=0)
+        clf = BaggingClassifier(estimator=SVC(), n_estimators=10, random_state=0)
 
         clf.fit(X, y)
 
@@ -25,7 +25,7 @@ def test_bagging_svc_1(self):
 
     def test_bagging_svc(self):
         X, y = make_classification(n_samples=100, n_features=4, n_informative=2, n_redundant=0, random_state=0, shuffle=False)
-        clf = BaggingClassifier(base_estimator=SVC(), n_estimators=10, random_state=0)
+        clf = BaggingClassifier(estimator=SVC(), n_estimators=10, random_state=0)
 
         clf.fit(X, y)
 
@@ -37,7 +37,7 @@ def test_bagging_svc(self):
 
     def test_bagging_logistic_regression(self):
         X, y = make_classification(n_samples=100, n_features=4, n_informative=2, n_redundant=0, random_state=0, shuffle=False)
-        clf = BaggingClassifier(base_estimator=LogisticRegression(), n_estimators=10, random_state=0)
+        clf = BaggingClassifier(estimator=LogisticRegression(), n_estimators=10, random_state=0)
 
         clf.fit(X, y)
 
@@ -49,7 +49,7 @@ def test_bagging_logistic_regression(self):
 
     def test_bagging_linear_svr_1(self):
         X, y = make_regression(n_samples=1000, n_features=8, n_informative=5, n_targets=1, random_state=0, shuffle=True)
-        reg = BaggingRegressor(base_estimator=LinearSVR(), n_estimators=10, random_state=0)
+        reg = BaggingRegressor(estimator=LinearSVR(), n_estimators=10, random_state=0)
 
         reg.fit(X, y)
 
@@ -64,7 +64,7 @@ def test_bagging_linear_svr_1(self):
 
     def test_bagging_linear_svr(self):
         X, y = make_regression(n_samples=1000, n_features=8, n_informative=5, n_targets=1, random_state=0, shuffle=True)
-        reg = BaggingRegressor(base_estimator=LinearSVR(), n_estimators=10, random_state=0)
+        reg = BaggingRegressor(estimator=LinearSVR(), n_estimators=10, random_state=0)
 
         reg.fit(X, y)
 
@@ -76,7 +76,7 @@ def test_bagging_linear_svr(self):
 
     def test_bagging_linear_regression(self):
         X, y = make_regression(n_samples=1000, n_features=8, n_informative=5, n_targets=1, random_state=0, shuffle=True)
-        reg = BaggingRegressor(base_estimator=LinearRegression(), n_estimators=10, random_state=0)
+        reg = BaggingRegressor(estimator=LinearRegression(), n_estimators=10, random_state=0)
 
         reg.fit(X, y)
 

diff --git a/tests/test_sklearn_pipeline.py b/tests/test_sklearn_pipeline.py
@@ -148,7 +148,7 @@ def test_pipeline_column_transformer_string(self):
         # SimpleImputer on string is not available for string
         # in ONNX-ML specifications.
         # So we do it beforehand.
-        X["pclass"].fillna("missing", inplace=True)
+        X['pclass'] = X['pclass'].fillna('missing')
 
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
@@ -204,7 +204,7 @@ def test_pipeline_column_transformer(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -245,7 +245,7 @@ def test_pipeline_column_transformer_pandas(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -286,7 +286,7 @@ def test_pipeline_column_transformer_pandas_ts(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -327,7 +327,7 @@ def test_pipeline_column_transformer_weights(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -369,7 +369,7 @@ def test_pipeline_column_transformer_weights_pandas(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -411,7 +411,7 @@ def test_pipeline_column_transformer_drop(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -454,7 +454,7 @@ def test_pipeline_column_transformer_drop_noweights(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -496,7 +496,7 @@ def test_pipeline_column_transformer_passthrough(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -539,7 +539,7 @@ def test_pipeline_column_transformer_passthrough_noweights(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[
@@ -581,7 +581,7 @@ def test_pipeline_column_transformer_passthrough_slice(self):
 
         numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
 
-        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore"))])
+        categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore"))])
 
         preprocessor = ColumnTransformer(
             transformers=[