From ae721030d7ccc09361270fdd6285737206c98964 Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 29 Oct 2024 19:41:28 +0100
Subject: [PATCH 01/12] Add ~= operator to search using wildcards

---
 src/dataregistry/query.py            | 10 +++++++--
 tests/end_to_end_tests/test_query.py | 31 ++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/src/dataregistry/query.py b/src/dataregistry/query.py
index fa134fe7..a2c56b25 100644
--- a/src/dataregistry/query.py
+++ b/src/dataregistry/query.py
@@ -59,6 +59,7 @@
     "<=": "__le__",
     ">": "__gt__",
     ">=": "__ge__",
+    "~=": None,
 }
 
 ALL_ORDERABLE = (
@@ -272,12 +273,17 @@ def _render_filter(self, f, stmt):
 
         # Extract the property we are ordering on (also making sure it
         # is orderable)
-        if not column_is_orderable[0] and f[1] not in ["==", "=", "!="]:
+        if not column_is_orderable[0] and f[1] not in ["~=", "==", "=", "!="]:
             raise ValueError('check_filter: Cannot apply "{f[1]}" to "{f[0]}"')
         else:
             value = f[2]
 
-        return stmt.where(column_ref[0].__getattribute__(the_op)(value))
+        # Special case where we are partially matching with a wildcard
+        if f[1] == "~=":
+            return stmt.where(column_ref[0].ilike(value))
+        # General case 
+        else:
+            return stmt.where(column_ref[0].__getattribute__(the_op)(value))
 
     def _append_filter_tables(self, tables_required, filters):
         """
diff --git a/tests/end_to_end_tests/test_query.py b/tests/end_to_end_tests/test_query.py
index 92268e6e..3f9fb6f3 100644
--- a/tests/end_to_end_tests/test_query.py
+++ b/tests/end_to_end_tests/test_query.py
@@ -107,3 +107,34 @@ def test_query_between_columns(dummy_file):
             assert i < 1
             assert getattr(r, "dataset.name") == _NAME
             assert getattr(r, "dataset.version_string") == _V_STRING
+
+def test_query_name(dummy_file):
+    """Test a quering on a partial name'"""
+
+    # Establish connection to database
+    tmp_src_dir, tmp_root_dir = dummy_file
+    datareg = DataRegistry(root_dir=str(tmp_root_dir), schema=DEFAULT_SCHEMA_WORKING)
+
+    # Add entry
+    for tag in ["first", "second", "third"]:
+        d_id = _insert_dataset_entry(
+            datareg,
+            f"DESC:datasets:test_query_name_{tag}",
+            "0.0.1",
+        )
+
+    # Do a wildcard search on the name
+    f = datareg.Query.gen_filter("dataset.name", "~=", "DESC:datasets:test_query_name%")
+    results = datareg.Query.find_datasets(property_names=None, filters=[f])
+
+    # Should return all three of our datasets
+    for c, v in results.items():
+        assert len(v) == 3
+
+    # Do an exact search on the name
+    f = datareg.Query.gen_filter("dataset.name", "==", "DESC:datasets:test_query_name_first")
+    results = datareg.Query.find_datasets(property_names=None, filters=[f])
+
+    # Should return just one dataset
+    for c, v in results.items():
+        assert len(v) == 1

From a0c047b527b9047c83117123648b61a7e36b5a4c Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 29 Oct 2024 20:23:19 +0100
Subject: [PATCH 02/12] Update dregs ls to allow for wildcard querying on the
 dataset name

---
 CHANGELOG.md                  | 10 ++++
 src/dataregistry_cli/cli.py   |  4 +-
 src/dataregistry_cli/query.py | 95 +++++++++++++++++++++--------------
 3 files changed, 70 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d4b4756e..1ee11601 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,13 @@
+## Version 1.0.4
+
+Make more options for querying
+
+- There is now a `~=` query operator that can utalise the `.ilike` filter to
+  allow non-case-sensitive filering with wildcards (i.e., the `%` character).
+- `dregs ls` can now filter on the dataset name, including `%` wildcards, using
+  the `--name` option.
+- `dregs_ls` can return arbitrary columns using the `--return_cols` option
+
 ## Version 1.0.3
 
 Some changes to the way the `relative_path` is automatically generated from the
diff --git a/src/dataregistry_cli/cli.py b/src/dataregistry_cli/cli.py
index be82b15c..034cfda2 100644
--- a/src/dataregistry_cli/cli.py
+++ b/src/dataregistry_cli/cli.py
@@ -77,10 +77,10 @@ def get_parser():
         help="List datasets for a given owner type",
         choices=["user", "group", "production", "project"],
     )
+    arg_ls.add_argument("--name", help="Only return datasets with a given name (% can be used as a wildcard)")
     arg_ls.add_argument("--all", help="List all datasets", action="store_true")
     arg_ls.add_argument(
-        "--extended", help="List more properties than the default", action="store_true"
-    )
+        "--return_cols", help="List of columns to return in the query", nargs="+", type=str)
     arg_ls.add_argument(
         "--max_rows",
         help="Maximum number of rows to print (default 500)",
diff --git a/src/dataregistry_cli/query.py b/src/dataregistry_cli/query.py
index bafd755c..2018adee 100644
--- a/src/dataregistry_cli/query.py
+++ b/src/dataregistry_cli/query.py
@@ -1,6 +1,52 @@
 import os
 from dataregistry import DataRegistry
 import pandas as pd
+from dataregistry import Filter
+
+
+def _render_filters(datareg, args):
+    """
+    Apply a filter to the query.
+
+    Both the owner and owner_type columns can be filtered against. In addition,
+    the dataset name column can be filtered against (allowed for % wildcards).
+
+    Keywords can also be filtered against. These have to be treated separately,
+    as they need to be linked to the keywords table.
+
+    Parameters
+    ----------
+    datareg : DataRegistry object
+    args : argparse object
+
+    Returns
+    -------
+    filters : list[Filter]
+    """
+
+    filters = []
+
+    # Dataset columns we can filter by
+    queriables = ["owner", "owner_type", "name"]
+
+    print("\nDataRegistry query:", end=" ")
+    if not args.all:
+        for col in queriables:
+            # Add filter on this column
+            if getattr(args, col) is not None:
+                if col == "name":
+                    filters.append(Filter(f"dataset.{col}", "~=", getattr(args, col)))
+                else:
+                    filters.append(Filter(f"dataset.{col}", "==", getattr(args, col)))
+                print(f"{col}=={getattr(args, col)}", end=" ")
+    else:
+        print("all datasets", end=" ")
+
+    # Add keywords filter
+    if args.keyword is not None:
+        filters.append(datareg.Query.gen_filter("keyword.keyword", "==", args.keyword))
+
+    return filters
 
 
 def dregs_ls(args):
@@ -20,6 +66,9 @@ def dregs_ls(args):
         Owner to list dataset entries for
     args.owner_type : str
         Owner type to list dataset entries for
+    args.name : str
+        Filter to only those results with a given dataset name (% can be used
+        as a wildcard)
     args.all : bool
         True to show all datasets, no filters
     args.config_file : str
@@ -30,8 +79,8 @@ def dregs_ls(args):
         Path to root_dir
     args.site : str
         Look up root_dir using a site
-    args.extended : bool
-        True to list more dataset properties
+    args.return_cols : list[str]
+        List of dataset columns to return
     args.max_chars : int
         Maximum number of character to print per column
     args.max_rows : int
@@ -59,28 +108,12 @@ def dregs_ls(args):
     else:
         datareg_prod = None
 
-    # Filter on dataset owner and/or owner_type
-    filters = []
+    # By default, search for "our" dataset
+    if args.owner is None:
+        args.owner = os.getenv("USER")
 
-    print("\nDataRegistry query:", end=" ")
-    if not args.all:
-        # Add owner_type filter
-        if args.owner_type is not None:
-            filters.append(Filter("dataset.owner_type", "==", args.owner_type))
-            print(f"owner_type=={args.owner_type}", end=" ")
-
-        # Add owner filter
-        if args.owner is None:
-            if args.owner_type is None:
-                filters.append(
-                    datareg.Query.gen_filter("dataset.owner", "==", os.getenv("USER"))
-                )
-                print(f"owner=={os.getenv('USER')}", end=" ")
-        else:
-            filters.append(datareg.Query.gen_filter("dataset.owner", "==", args.owner))
-            print(f"owner=={args.owner}", end=" ")
-    else:
-        print("all datasets", end=" ")
+    # Render search filters
+    filters = _render_filters(datareg, args)
 
     # What columns are we printing
     _print_cols = [
@@ -90,23 +123,11 @@ def dregs_ls(args):
         "dataset.owner_type",
         "dataset.description",
     ]
-    if args.extended:
-        _print_cols.extend(
-            [
-                "dataset.dataset_id",
-                "dataset.relative_path",
-                "dataset.status",
-                "dataset.register_date",
-                "dataset.is_overwritable",
-            ]
-        )
-
-    # Add keywords filter
+    if args.return_cols is not None:
+        _print_cols = [f"dataset.{x}" for x in args.return_cols]
     if args.keyword is not None:
         _print_cols.append("keyword.keyword")
 
-        filters.append(datareg.Query.gen_filter("keyword.keyword", "==", args.keyword))
-
     # Loop over this schema and the production schema and print the results
     for this_datareg in [datareg, datareg_prod]:
         if this_datareg is None:

From bed26bcdfffdc4bd7405fd2c273cb3a10960738d Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 29 Oct 2024 20:23:41 +0100
Subject: [PATCH 03/12] Update version to 1.0.4

---
 src/dataregistry/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dataregistry/_version.py b/src/dataregistry/_version.py
index 976498ab..92192eed 100644
--- a/src/dataregistry/_version.py
+++ b/src/dataregistry/_version.py
@@ -1 +1 @@
-__version__ = "1.0.3"
+__version__ = "1.0.4"

From 3e03e68848a643d40dcb5778dfd5f18f5d238714 Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 29 Oct 2024 20:43:19 +0100
Subject: [PATCH 04/12] Remove % from help string

---
 src/dataregistry_cli/cli.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/dataregistry_cli/cli.py b/src/dataregistry_cli/cli.py
index 034cfda2..17754340 100644
--- a/src/dataregistry_cli/cli.py
+++ b/src/dataregistry_cli/cli.py
@@ -77,10 +77,16 @@ def get_parser():
         help="List datasets for a given owner type",
         choices=["user", "group", "production", "project"],
     )
-    arg_ls.add_argument("--name", help="Only return datasets with a given name (% can be used as a wildcard)")
+    arg_ls.add_argument(
+        "--name", help="Only return datasets with a given name (wildcard support)"
+    )
     arg_ls.add_argument("--all", help="List all datasets", action="store_true")
     arg_ls.add_argument(
-        "--return_cols", help="List of columns to return in the query", nargs="+", type=str)
+        "--return_cols",
+        help="List of columns to return in the query",
+        nargs="+",
+        type=str,
+    )
     arg_ls.add_argument(
         "--max_rows",
         help="Maximum number of rows to print (default 500)",

From 66381207d3f0e5213084ca23d020d98bb55c72ab Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 29 Oct 2024 20:54:22 +0100
Subject: [PATCH 05/12] Update docs

---
 docs/source/tutorial_cli.rst                  | 14 ++++++++++++--
 .../tutorial_notebooks/query_datasets.ipynb   | 19 +++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/docs/source/tutorial_cli.rst b/docs/source/tutorial_cli.rst
index c9f34ed4..bc67ca21 100644
--- a/docs/source/tutorial_cli.rst
+++ b/docs/source/tutorial_cli.rst
@@ -112,11 +112,21 @@ For example, to see all the datasets from the DESC Generic Working Group we woul
 
    dregs ls --owner "DESC Generic Working Group"
 
-To list entries from all owners type
+To list entries from all owners do ``--owner none``.
+
+You can search against the ``dataset.name`` column, with wildcard support, e.g.,
+
+.. code-block:: bash
+
+   dregs ls --name dataset:dc2:%
+
+will search for all datasets whose name starts with the pattern "dataset:dc2:".
+
+To select what columns are printed in the result use the ``--return_cols`` option, e.g.,
 
 .. code-block:: bash
 
-   dregs ls --all
+   dregs ls --return_cols dataset_id name description status
 
 Using ``dregs ls`` is a quick an easy way to remind yourself what names you
 gave to previous datasets, and what relative paths they reside at.
diff --git a/docs/source/tutorial_notebooks/query_datasets.ipynb b/docs/source/tutorial_notebooks/query_datasets.ipynb
index cd832aff..9ae576b5 100644
--- a/docs/source/tutorial_notebooks/query_datasets.ipynb
+++ b/docs/source/tutorial_notebooks/query_datasets.ipynb
@@ -116,6 +116,25 @@
     "\n",
     "The allowed boolean logic operators are: `==`, `!=`, `<`, `<=`, `>` and `>=`.\n",
     "\n",
+    "A special operator, `~=`, can be use to perform wildcard querties. This is particularly useful when only a partial dataset name is known, or when we want to return all datasets with a similar naming pattern, form example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "578951ce-cc30-4ab3-88b4-7bb98b734b9c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a filter that queries on the dataset name with a wildcard\n",
+    "f = datareg.Query.gen_filter('dataset.name', '~=', 'nersc_tutorial:%')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "37c23c39-7a78-4931-a281-226a7bfc9333",
+   "metadata": {},
+   "source": [
     "### Performing the query\n",
     "\n",
     "Now we can pass this filter through to a query using the `Query` extension of the `DataRegistry` class, e.g.,"

From b81e9152009de99f21d2694bbf2a0f7abb061b43 Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 29 Oct 2024 20:56:21 +0100
Subject: [PATCH 06/12] Add support for searching all owners

---
 src/dataregistry_cli/query.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/dataregistry_cli/query.py b/src/dataregistry_cli/query.py
index 2018adee..b16eb68d 100644
--- a/src/dataregistry_cli/query.py
+++ b/src/dataregistry_cli/query.py
@@ -30,17 +30,15 @@ def _render_filters(datareg, args):
     queriables = ["owner", "owner_type", "name"]
 
     print("\nDataRegistry query:", end=" ")
-    if not args.all:
-        for col in queriables:
-            # Add filter on this column
-            if getattr(args, col) is not None:
-                if col == "name":
-                    filters.append(Filter(f"dataset.{col}", "~=", getattr(args, col)))
-                else:
+    for col in queriables:
+        # Add filter on this column
+        if getattr(args, col) is not None:
+            if col == "name":
+                filters.append(Filter(f"dataset.{col}", "~=", getattr(args, col)))
+            else:
+                if not (col == "owner" and getattr(args, col).lower() == "none"):
                     filters.append(Filter(f"dataset.{col}", "==", getattr(args, col)))
-                print(f"{col}=={getattr(args, col)}", end=" ")
-    else:
-        print("all datasets", end=" ")
+            print(f"{col}=={getattr(args, col)}", end=" ")
 
     # Add keywords filter
     if args.keyword is not None:

From 2421509541b0557eddf316506c46d39f97387e70 Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 29 Oct 2024 20:58:00 +0100
Subject: [PATCH 07/12] Get rid of all from cli

---
 src/dataregistry_cli/cli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/dataregistry_cli/cli.py b/src/dataregistry_cli/cli.py
index 17754340..ce6eb1e4 100644
--- a/src/dataregistry_cli/cli.py
+++ b/src/dataregistry_cli/cli.py
@@ -80,7 +80,6 @@ def get_parser():
     arg_ls.add_argument(
         "--name", help="Only return datasets with a given name (wildcard support)"
     )
-    arg_ls.add_argument("--all", help="List all datasets", action="store_true")
     arg_ls.add_argument(
         "--return_cols",
         help="List of columns to return in the query",

From 7627aa58c9882f1da58b6238216ec682d6f7ccab Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Fri, 1 Nov 2024 11:11:53 +0100
Subject: [PATCH 08/12] Add case-sensitive wildcard operator ~==

---
 docs/source/tutorial_cli.rst                  |  3 +-
 .../tutorial_notebooks/query_datasets.ipynb   | 10 ++++-
 src/dataregistry/query.py                     | 10 +++--
 tests/end_to_end_tests/test_query.py          | 38 +++++++++++--------
 4 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/docs/source/tutorial_cli.rst b/docs/source/tutorial_cli.rst
index bc67ca21..3a3bf736 100644
--- a/docs/source/tutorial_cli.rst
+++ b/docs/source/tutorial_cli.rst
@@ -120,7 +120,8 @@ You can search against the ``dataset.name`` column, with wildcard support, e.g.,
 
    dregs ls --name dataset:dc2:%
 
-will search for all datasets whose name starts with the pattern "dataset:dc2:".
+will search for all datasets whose name starts with the pattern "dataset:dc2:"
+(note the ``--name`` queries here are case insensitive).
 
 To select what columns are printed in the result use the ``--return_cols`` option, e.g.,
 
diff --git a/docs/source/tutorial_notebooks/query_datasets.ipynb b/docs/source/tutorial_notebooks/query_datasets.ipynb
index 9ae576b5..33ce6aec 100644
--- a/docs/source/tutorial_notebooks/query_datasets.ipynb
+++ b/docs/source/tutorial_notebooks/query_datasets.ipynb
@@ -116,7 +116,7 @@
     "\n",
     "The allowed boolean logic operators are: `==`, `!=`, `<`, `<=`, `>` and `>=`.\n",
     "\n",
-    "A special operator, `~=`, can be use to perform wildcard querties. This is particularly useful when only a partial dataset name is known, or when we want to return all datasets with a similar naming pattern, form example"
+    "A special operator, `~=`, can be use to perform wildcard querties. This is particularly useful when only a partial dataset name is known, or when we want to return all datasets with a similar naming pattern, for example"
    ]
   },
   {
@@ -130,6 +130,14 @@
     "f = datareg.Query.gen_filter('dataset.name', '~=', 'nersc_tutorial:%')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "877e32d9-08b6-4121-8afa-07f3ed8f8524",
+   "metadata": {},
+   "source": [
+    "will return all datasets whose name begins with the pattern `nersc_tutorial:`. The `~=` operator is case insensitive, for case sensitive wildcard searching, one can use the `~==` operator."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "37c23c39-7a78-4931-a281-226a7bfc9333",
diff --git a/src/dataregistry/query.py b/src/dataregistry/query.py
index a2c56b25..34b1cba9 100644
--- a/src/dataregistry/query.py
+++ b/src/dataregistry/query.py
@@ -60,6 +60,7 @@
     ">": "__gt__",
     ">=": "__ge__",
     "~=": None,
+    "~==": None,
 }
 
 ALL_ORDERABLE = (
@@ -273,15 +274,18 @@ def _render_filter(self, f, stmt):
 
         # Extract the property we are ordering on (also making sure it
         # is orderable)
-        if not column_is_orderable[0] and f[1] not in ["~=", "==", "=", "!="]:
+        if not column_is_orderable[0] and f[1] not in ["~==", "~=", "==", "=", "!="]:
             raise ValueError('check_filter: Cannot apply "{f[1]}" to "{f[0]}"')
         else:
             value = f[2]
 
-        # Special case where we are partially matching with a wildcard
+        # Case insensitive wildcard matching
         if f[1] == "~=":
             return stmt.where(column_ref[0].ilike(value))
-        # General case 
+        # Case sensitive wildcard matching
+        elif f[1] == "~==":
+            return stmt.where(column_ref[0].like(value))
+        # General case using traditional boolean operator 
         else:
             return stmt.where(column_ref[0].__getattribute__(the_op)(value))
 
diff --git a/tests/end_to_end_tests/test_query.py b/tests/end_to_end_tests/test_query.py
index 3f9fb6f3..5dca69d8 100644
--- a/tests/end_to_end_tests/test_query.py
+++ b/tests/end_to_end_tests/test_query.py
@@ -1,3 +1,4 @@
+import pytest
 import os
 import pandas as pd
 import sqlalchemy
@@ -108,33 +109,38 @@ def test_query_between_columns(dummy_file):
             assert getattr(r, "dataset.name") == _NAME
             assert getattr(r, "dataset.version_string") == _V_STRING
 
-def test_query_name(dummy_file):
-    """Test a quering on a partial name'"""
+@pytest.mark.parametrize(
+    "op,qstr,ans,tag",
+    [
+        ("~=", "DESC:datasets:test_query_name_nocasewildcard%", 3, "nocasewildcard"),
+        ("==", "DESC:datasets:test_query_name_exactmatch_first", 1, "exactmatch"),
+        ("~==", "DESC:datasets:Test_Query_Name_nocasewildcard%", 0, "casewildcardfail"),
+        ("~==", "DESC:datasets:test_query_name_nocasewildcard%", 3, "casewildcardpass"),
+    ],
+)
+def test_query_name(dummy_file,op,qstr,ans,tag):
+    """Test a quering on a partial name with wildcards"""
 
     # Establish connection to database
     tmp_src_dir, tmp_root_dir = dummy_file
     datareg = DataRegistry(root_dir=str(tmp_root_dir), schema=DEFAULT_SCHEMA_WORKING)
 
     # Add entry
-    for tag in ["first", "second", "third"]:
+    for tmp_tag in ["first", "second", "third"]:
         d_id = _insert_dataset_entry(
             datareg,
-            f"DESC:datasets:test_query_name_{tag}",
+            f"DESC:datasets:test_query_name_{tag}_{tmp_tag}",
             "0.0.1",
         )
 
     # Do a wildcard search on the name
-    f = datareg.Query.gen_filter("dataset.name", "~=", "DESC:datasets:test_query_name%")
+    f = datareg.Query.gen_filter("dataset.name", op, qstr)
     results = datareg.Query.find_datasets(property_names=None, filters=[f])
 
-    # Should return all three of our datasets
-    for c, v in results.items():
-        assert len(v) == 3
-
-    # Do an exact search on the name
-    f = datareg.Query.gen_filter("dataset.name", "==", "DESC:datasets:test_query_name_first")
-    results = datareg.Query.find_datasets(property_names=None, filters=[f])
-
-    # Should return just one dataset
-    for c, v in results.items():
-        assert len(v) == 1
+    # How many datasets did we find
+    if ans == 0:
+        assert len(results) == 0
+    else:
+        assert len(results) > 0
+        for c, v in results.items():
+            assert len(v) == ans

From 5012a1f31e3a5e8e9a7e7f9e57a192e75d68285f Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Fri, 1 Nov 2024 11:33:12 +0100
Subject: [PATCH 09/12] Change query wildcard to * over %

---
 docs/source/tutorial_cli.rst                        | 5 +++--
 docs/source/tutorial_notebooks/query_datasets.ipynb | 4 ++--
 src/dataregistry/query.py                           | 8 +++++---
 tests/end_to_end_tests/test_query.py                | 9 +++++----
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/docs/source/tutorial_cli.rst b/docs/source/tutorial_cli.rst
index 3a3bf736..af7c5394 100644
--- a/docs/source/tutorial_cli.rst
+++ b/docs/source/tutorial_cli.rst
@@ -114,11 +114,12 @@ For example, to see all the datasets from the DESC Generic Working Group we woul
 
 To list entries from all owners do ``--owner none``.
 
-You can search against the ``dataset.name`` column, with wildcard support, e.g.,
+You can search against the ``dataset.name`` column, with wildcard support,
+where ``*`` is the wildcard character, e.g.,
 
 .. code-block:: bash
 
-   dregs ls --name dataset:dc2:%
+   dregs ls --name dataset:dc2:*
 
 will search for all datasets whose name starts with the pattern "dataset:dc2:"
 (note the ``--name`` queries here are case insensitive).
diff --git a/docs/source/tutorial_notebooks/query_datasets.ipynb b/docs/source/tutorial_notebooks/query_datasets.ipynb
index 33ce6aec..5af3cb8f 100644
--- a/docs/source/tutorial_notebooks/query_datasets.ipynb
+++ b/docs/source/tutorial_notebooks/query_datasets.ipynb
@@ -116,7 +116,7 @@
     "\n",
     "The allowed boolean logic operators are: `==`, `!=`, `<`, `<=`, `>` and `>=`.\n",
     "\n",
-    "A special operator, `~=`, can be use to perform wildcard querties. This is particularly useful when only a partial dataset name is known, or when we want to return all datasets with a similar naming pattern, for example"
+    "A special operator, `~=`, can be use to perform wildcard querties, where `*` is the wildcard character. This is particularly useful when only a partial dataset name is known, or when we want to return all datasets with a similar naming pattern, for example"
    ]
   },
   {
@@ -127,7 +127,7 @@
    "outputs": [],
    "source": [
     "# Create a filter that queries on the dataset name with a wildcard\n",
-    "f = datareg.Query.gen_filter('dataset.name', '~=', 'nersc_tutorial:%')"
+    "f = datareg.Query.gen_filter('dataset.name', '~=', 'nersc_tutorial:*')"
    ]
   },
   {
diff --git a/src/dataregistry/query.py b/src/dataregistry/query.py
index 34b1cba9..4fe293ba 100644
--- a/src/dataregistry/query.py
+++ b/src/dataregistry/query.py
@@ -279,11 +279,13 @@ def _render_filter(self, f, stmt):
         else:
             value = f[2]
 
-        # Case insensitive wildcard matching
+        # Case insensitive wildcard matching (wildcard is '*')
         if f[1] == "~=":
-            return stmt.where(column_ref[0].ilike(value))
-        # Case sensitive wildcard matching
+            tmp = value.replace('%', r'\%').replace('_', r'\_').replace('*', '%')
+            return stmt.where(column_ref[0].ilike(tmp))
+        # Case sensitive wildcard matching (wildcard is '*')
         elif f[1] == "~==":
+            tmp = value.replace('%', r'\%').replace('_', r'\_').replace('*', '%')
             return stmt.where(column_ref[0].like(value))
         # General case using traditional boolean operator 
         else:
diff --git a/tests/end_to_end_tests/test_query.py b/tests/end_to_end_tests/test_query.py
index 5dca69d8..386c864d 100644
--- a/tests/end_to_end_tests/test_query.py
+++ b/tests/end_to_end_tests/test_query.py
@@ -109,16 +109,17 @@ def test_query_between_columns(dummy_file):
             assert getattr(r, "dataset.name") == _NAME
             assert getattr(r, "dataset.version_string") == _V_STRING
 
+
 @pytest.mark.parametrize(
     "op,qstr,ans,tag",
     [
-        ("~=", "DESC:datasets:test_query_name_nocasewildcard%", 3, "nocasewildcard"),
+        ("~=", "DESC:datasets:test_query_name_nocasewildcard*", 3, "nocasewildcard"),
         ("==", "DESC:datasets:test_query_name_exactmatch_first", 1, "exactmatch"),
-        ("~==", "DESC:datasets:Test_Query_Name_nocasewildcard%", 0, "casewildcardfail"),
-        ("~==", "DESC:datasets:test_query_name_nocasewildcard%", 3, "casewildcardpass"),
+        ("~==", "DESC:datasets:Test_Query_Name_nocasewildcard*", 0, "casewildcardfail"),
+        ("~==", "DESC:datasets:test_query_name_nocasewildcard*", 3, "casewildcardpass"),
     ],
 )
-def test_query_name(dummy_file,op,qstr,ans,tag):
+def test_query_name(dummy_file, op, qstr, ans, tag):
     """Test a quering on a partial name with wildcards"""
 
     # Establish connection to database

From 47ef94e83072f738f539e3acfd092698eb0fa8ed Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Fri, 1 Nov 2024 11:38:43 +0100
Subject: [PATCH 10/12] Fix bug

---
 src/dataregistry/query.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dataregistry/query.py b/src/dataregistry/query.py
index 4fe293ba..a054289b 100644
--- a/src/dataregistry/query.py
+++ b/src/dataregistry/query.py
@@ -286,7 +286,7 @@ def _render_filter(self, f, stmt):
         # Case sensitive wildcard matching (wildcard is '*')
         elif f[1] == "~==":
             tmp = value.replace('%', r'\%').replace('_', r'\_').replace('*', '%')
-            return stmt.where(column_ref[0].like(value))
+            return stmt.where(column_ref[0].like(tmp))
         # General case using traditional boolean operator 
         else:
             return stmt.where(column_ref[0].__getattribute__(the_op)(value))

From a99b3ba177610f4053be9fae143e03cf194df2b7 Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Fri, 1 Nov 2024 11:51:14 +0100
Subject: [PATCH 11/12] Fix query test for sqlite

---
 tests/end_to_end_tests/test_query.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/end_to_end_tests/test_query.py b/tests/end_to_end_tests/test_query.py
index 386c864d..2614f75b 100644
--- a/tests/end_to_end_tests/test_query.py
+++ b/tests/end_to_end_tests/test_query.py
@@ -109,7 +109,9 @@ def test_query_between_columns(dummy_file):
             assert getattr(r, "dataset.name") == _NAME
             assert getattr(r, "dataset.version_string") == _V_STRING
 
-
+@pytest.mark.skipif(
+    datareg.db_connection._dialect == "sqlite", reason="wildcards break for sqlite"
+)
 @pytest.mark.parametrize(
     "op,qstr,ans,tag",
     [

From 264ebfb966cf706a6809ca6a8012da49dcbb1a23 Mon Sep 17 00:00:00 2001
From: Stuart McAlpine <stuart.mcalpine@fysik.su.se>
Date: Tue, 12 Nov 2024 14:41:55 +0100
Subject: [PATCH 12/12] Restrict ~= to only work on certain columns

---
 src/dataregistry/query.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/dataregistry/query.py b/src/dataregistry/query.py
index a054289b..70fd965a 100644
--- a/src/dataregistry/query.py
+++ b/src/dataregistry/query.py
@@ -76,6 +76,12 @@
     .union(LITE_TYPES)
 )
 
+ILIKE_ALLOWED = [
+    "dataset.name",
+    "dataset.owner",
+    "dataset.relative_path",
+    "dataset.access_api"
+]
 
 def is_orderable_type(ctype):
     return type(ctype) in ALL_ORDERABLE
@@ -279,14 +285,20 @@ def _render_filter(self, f, stmt):
         else:
             value = f[2]
 
-        # Case insensitive wildcard matching (wildcard is '*')
-        if f[1] == "~=":
-            tmp = value.replace('%', r'\%').replace('_', r'\_').replace('*', '%')
-            return stmt.where(column_ref[0].ilike(tmp))
-        # Case sensitive wildcard matching (wildcard is '*')
-        elif f[1] == "~==":
+        # String partial matching with wildcard
+        if f[1] in ["~=", "~=="]:
+            if f[0] not in ILIKE_ALLOWED:
+                raise ValueError(f"Can only perform ~= search on {ILIKE_ALLOWED}")
+
             tmp = value.replace('%', r'\%').replace('_', r'\_').replace('*', '%')
-            return stmt.where(column_ref[0].like(tmp))
+
+            # Case insensitive wildcard matching (wildcard is '*')
+            if f[1] == "~=":
+                return stmt.where(column_ref[0].ilike(tmp))
+            # Case sensitive wildcard matching (wildcard is '*')
+            else:
+                return stmt.where(column_ref[0].like(tmp))
+
         # General case using traditional boolean operator 
         else:
             return stmt.where(column_ref[0].__getattribute__(the_op)(value))