From f4e1fa94f6b8385092eaddc721d6dfe9799f9a68 Mon Sep 17 00:00:00 2001
From: Sameer Sheorey <41028320+ssheorey@users.noreply.github.com>
Date: Wed, 18 Sep 2024 10:27:45 -0700
Subject: [PATCH 1/4] Add remote visualizer example. Allow changing rpc address
 in draw. (#6973)

---
 .../python/visualization/remote_visualizer.py | 71 +++++++++++++++++++
 .../visualization/_external_visualizer.py     |  7 ++
 python/open3d/visualization/draw.py           | 11 +--
 3 files changed, 85 insertions(+), 4 deletions(-)
 create mode 100644 examples/python/visualization/remote_visualizer.py

diff --git a/examples/python/visualization/remote_visualizer.py b/examples/python/visualization/remote_visualizer.py
new file mode 100644
index 00000000000..e360e55272b
--- /dev/null
+++ b/examples/python/visualization/remote_visualizer.py
@@ -0,0 +1,71 @@
+# ----------------------------------------------------------------------------
+# -                        Open3D: www.open3d.org                            -
+# ----------------------------------------------------------------------------
+# Copyright (c) 2018-2023 www.open3d.org
+# SPDX-License-Identifier: MIT
+# ----------------------------------------------------------------------------
+"""This example shows Open3D's remote visualization feature using RPC
+communication. To run this example, start the client first by running
+
+python remote_visualizer.py client
+
+and then run the server by running
+
+python remote_visualizer.py server
+
+Port 51454 is used by default for communication. For remote visualization (client
+and server running on different machines), use ssh to forward the remote server
+port to your local computer:
+
+    ssh -N -R 51454:localhost:51454 user@remote_host
+
+See documentation for more details (e.g. to use a different port).
+"""
+import sys
+import numpy as np
+import open3d as o3d
+import open3d.visualization as vis
+
+
+def make_point_cloud(npts, center, radius, colorize):
+    pts = np.random.uniform(-radius, radius, size=[npts, 3]) + center
+    cloud = o3d.geometry.PointCloud()
+    cloud.points = o3d.utility.Vector3dVector(pts)
+    if colorize:
+        colors = np.random.uniform(0.0, 1.0, size=[npts, 3])
+        cloud.colors = o3d.utility.Vector3dVector(colors)
+    return cloud
+
+
+def server_time_animation():
+    orig = make_point_cloud(200, (0, 0, 0), 1.0, True)
+    clouds = [{"name": "t=0", "geometry": orig, "time": 0}]
+    drift_dir = (1.0, 0.0, 0.0)
+    expand = 1.0
+    n = 20
+    ev = o3d.visualization.ExternalVisualizer()
+    for i in range(1, n):
+        amount = float(i) / float(n - 1)
+        cloud = o3d.geometry.PointCloud()
+        pts = np.asarray(orig.points)
+        pts = pts * (1.0 + amount * expand) + [amount * v for v in drift_dir]
+        cloud.points = o3d.utility.Vector3dVector(pts)
+        cloud.colors = orig.colors
+        ev.set(obj=cloud, time=i, path=f"points at t={i}")
+        print('.', end='', flush=True)
+    print()
+
+
+def client_time_animation():
+    o3d.visualization.draw(title="Open3D - Remote Visualizer Client",
+                           show_ui=True,
+                           rpc_interface=True)
+
+
+if __name__ == "__main__":
+    assert len(sys.argv) == 2 and sys.argv[1] in ('client', 'server'), (
+        "Usage: python remote_visualizer.py [client|server]")
+    if sys.argv[1] == "client":
+        client_time_animation()
+    elif sys.argv[1] == "server":
+        server_time_animation()
diff --git a/python/open3d/visualization/_external_visualizer.py b/python/open3d/visualization/_external_visualizer.py
index ac7209127f0..774ddbaebe1 100644
--- a/python/open3d/visualization/_external_visualizer.py
+++ b/python/open3d/visualization/_external_visualizer.py
@@ -37,17 +37,23 @@ def set(self, obj=None, path='', time=0, layer='', connection=None):
 
         Example:
             To quickly send a single object just write::
+
                 ev.set(point_cloud)
 
             To place the object at a specific location in the scene tree do::
+
                 ev.set(point_cloud, path='group/mypoints', time=42, layer='')
+
             Note that depending on the visualizer some arguments like time or
             layer may not be supported and will be ignored.
 
             To set multiple objects use a list to pass multiple objects::
+
                 ev.set([point_cloud, mesh, camera])
+
             Each entry in the list can be a tuple specifying all or some of the
             location parameters::
+
                 ev.set(objs=[(point_cloud,'group/mypoints', 1, 'layer1'),
                              (mesh, 'group/mymesh'),
                              camera
@@ -147,6 +153,7 @@ def draw(self, geometry=None, *args, **kwargs):
 
         Example:
             Here we use draw with the default external visualizer::
+
                 import open3d as o3d
 
                 torus = o3d.geometry.TriangleMesh.create_torus()
diff --git a/python/open3d/visualization/draw.py b/python/open3d/visualization/draw.py
index f7764b41400..aa6e29e2b2c 100644
--- a/python/open3d/visualization/draw.py
+++ b/python/open3d/visualization/draw.py
@@ -83,9 +83,10 @@ def draw(geometry=None,
         animation_time_step (float): Duration in seconds for each animation
             frame.
         animation_duration (float): Total animation duration in seconds.
-        rpc_interface (bool): Start an RPC interface at http://localhost:51454 and
-            listen for drawing requests. The requests can be made with
-            :class:`open3d.visualization.ExternalVisualizer`.
+        rpc_interface (bool or str): Start an RPC interface at this local
+            address and listen for drawing requests. If rpc_interface is True, the
+            default address "tcp://localhost:51454" is used. The requests can be
+            made with :class:`open3d.visualization.ExternalVisualizer`.
         on_init (Callable): Extra initialization procedure for the underlying
             GUI window. The procedure receives a single argument of type
             :class:`open3d.visualization.O3DVisualizer`.
@@ -202,7 +203,9 @@ def add(g, n):
         w.show_skybox(show_skybox)
 
     if rpc_interface:
-        w.start_rpc_interface(address="tcp://127.0.0.1:51454", timeout=10000)
+        if not isinstance(rpc_interface, str):
+            rpc_interface = "tcp://127.0.0.1:51454"
+        w.start_rpc_interface(address=rpc_interface, timeout=10000)
 
         def stop_rpc():
             w.stop_rpc_interface()

From e88c7b13270d961393ab1e64c03d1f7ccd047cce Mon Sep 17 00:00:00 2001
From: Sameer Sheorey <41028320+ssheorey@users.noreply.github.com>
Date: Sun, 29 Sep 2024 23:07:07 -0700
Subject: [PATCH 2/4] Replace conda with pyenv to fix incorrect libstdc++ use
 in jammy CI. (#6966)

* Replace conda with pyenv to fix incorrect libstdc++ use in jammy CI.
* Use RPATH instead of RUNPATH to load libc++abi.so directly in Python. No need to find and load explicitly.
* Use libc++11 to build in Ubuntu 22.04. Warn if newer version is used.
* TODO: Solution for Ubuntu 24.04
---
 3rdparty/find_dependencies.cmake    |  9 ++++++
 cpp/open3d/core/nns/NanoFlannImpl.h | 25 ++++------------
 cpp/pybind/CMakeLists.txt           |  3 ++
 cpp/tests/t/geometry/TensorMap.cpp  |  7 +++++
 docker/Dockerfile.ci                | 44 +++++++++++++++++++----------
 docker/docker_test.sh               |  2 +-
 python/open3d/__init__.py           |  8 ------
 util/install_deps_ubuntu.sh         | 11 ++++++--
 8 files changed, 63 insertions(+), 46 deletions(-)

diff --git a/3rdparty/find_dependencies.cmake b/3rdparty/find_dependencies.cmake
index 77b7085df69..d91377a3138 100644
--- a/3rdparty/find_dependencies.cmake
+++ b/3rdparty/find_dependencies.cmake
@@ -1338,6 +1338,7 @@ if(BUILD_GUI)
                     if (CPP_LIBRARY AND CPPABI_LIBRARY)
                         set(CLANG_LIBDIR ${llvm_lib_dir})
                         message(STATUS "CLANG_LIBDIR found in ubuntu-default: ${CLANG_LIBDIR}")
+                        set(LIBCPP_VERSION ${llvm_ver})
                         break()
                     endif()
                 endforeach()
@@ -1362,7 +1363,10 @@ if(BUILD_GUI)
                              llvm-8/lib
                              llvm-7/lib
                 )
+                file(REAL_PATH ${CPPABI_LIBRARY} CPPABI_LIBRARY)
                 get_filename_component(CLANG_LIBDIR ${CPPABI_LIBRARY} DIRECTORY)
+                string(REGEX MATCH "llvm-([0-9]+)/lib" _ ${CLANG_LIBDIR})
+                set(LIBCPP_VERSION ${CMAKE_MATCH_1})
             endif()
 
             # Find clang libraries at the exact path ${CLANG_LIBDIR}.
@@ -1378,6 +1382,11 @@ if(BUILD_GUI)
             target_link_libraries(3rdparty_filament INTERFACE -lstdc++
                                   ${CPP_LIBRARY} ${CPPABI_LIBRARY})
             message(STATUS "Filament C++ libraries: ${CPP_LIBRARY} ${CPPABI_LIBRARY}")
+            if (LIBCPP_VERSION GREATER 11)
+                message(WARNING "libc++ (LLVM) version ${LIBCPP_VERSION} > 11 includes libunwind that " 
+                "interferes with the system libunwind.so.8 and may crash Python code when exceptions "
+                "are used. Please consider using libc++ (LLVM) v11.")
+            endif()
         endif()
         if (APPLE)
             find_library(CORE_VIDEO CoreVideo)
diff --git a/cpp/open3d/core/nns/NanoFlannImpl.h b/cpp/open3d/core/nns/NanoFlannImpl.h
index 027d6d0c4ee..9a9ceb100c4 100644
--- a/cpp/open3d/core/nns/NanoFlannImpl.h
+++ b/cpp/open3d/core/nns/NanoFlannImpl.h
@@ -118,13 +118,6 @@ void _KnnSearchCPU(NanoFlannIndexHolderBase *holder,
         return;
     }
 
-    auto points_equal = [](const T *const p1, const T *const p2,
-                           size_t dimension) {
-        std::vector<T> p1_vec(p1, p1 + dimension);
-        std::vector<T> p2_vec(p2, p2 + dimension);
-        return p1_vec == p2_vec;
-    };
-
     std::vector<std::vector<TIndex>> neighbors_indices(num_queries);
     std::vector<std::vector<T>> neighbors_distances(num_queries);
     std::vector<uint32_t> neighbors_count(num_queries, 0);
@@ -147,8 +140,9 @@ void _KnnSearchCPU(NanoFlannIndexHolderBase *holder,
                     for (size_t valid_i = 0; valid_i < num_valid; ++valid_i) {
                         TIndex idx = result_indices[valid_i];
                         if (ignore_query_point &&
-                            points_equal(&queries[i * dimension],
-                                         &points[idx * dimension], dimension)) {
+                            std::equal(&queries[i * dimension],
+                                       &queries[i * dimension] + dimension,
+                                       &points[idx * dimension])) {
                             continue;
                         }
                         neighbors_indices[i].push_back(idx);
@@ -222,13 +216,6 @@ void _RadiusSearchCPU(NanoFlannIndexHolderBase *holder,
         return;
     }
 
-    auto points_equal = [](const T *const p1, const T *const p2,
-                           size_t dimension) {
-        std::vector<T> p1_vec(p1, p1 + dimension);
-        std::vector<T> p2_vec(p2, p2 + dimension);
-        return p1_vec == p2_vec;
-    };
-
     std::vector<std::vector<TIndex>> neighbors_indices(num_queries);
     std::vector<std::vector<T>> neighbors_distances(num_queries);
     std::vector<uint32_t> neighbors_count(num_queries, 0);
@@ -255,9 +242,9 @@ void _RadiusSearchCPU(NanoFlannIndexHolderBase *holder,
                     int num_neighbors = 0;
                     for (const auto &idx_dist : search_result) {
                         if (ignore_query_point &&
-                            points_equal(&queries[i * dimension],
-                                         &points[idx_dist.first * dimension],
-                                         dimension)) {
+                            std::equal(&queries[i * dimension],
+                                       &queries[i * dimension] + dimension,
+                                       &points[idx_dist.first * dimension])) {
                             continue;
                         }
                         neighbors_indices[i].push_back(idx_dist.first);
diff --git a/cpp/pybind/CMakeLists.txt b/cpp/pybind/CMakeLists.txt
index e7a534a3eb3..c79bbd96719 100644
--- a/cpp/pybind/CMakeLists.txt
+++ b/cpp/pybind/CMakeLists.txt
@@ -80,6 +80,9 @@ set(PYTHON_COMPILED_MODULE_DIR
 if (APPLE)
     set_target_properties(pybind PROPERTIES BUILD_RPATH "@loader_path;@loader_path/..")
 elseif (UNIX)
+    # Use RPATH instead of RUNPATH in pybind so that needed libc++.so can find child dependant libc++abi.so in RPATH
+    # https://stackoverflow.com/questions/69662319/managing-secondary-dependencies-of-shared-libraries
+    target_link_options(pybind PRIVATE "LINKER:--disable-new-dtags")
     set_target_properties(pybind PROPERTIES BUILD_RPATH "$ORIGIN;$ORIGIN/..")
 endif()
 set_target_properties(pybind PROPERTIES
diff --git a/cpp/tests/t/geometry/TensorMap.cpp b/cpp/tests/t/geometry/TensorMap.cpp
index 372c6b82270..8512ce68ccb 100644
--- a/cpp/tests/t/geometry/TensorMap.cpp
+++ b/cpp/tests/t/geometry/TensorMap.cpp
@@ -32,6 +32,13 @@ TEST_P(TensorMapPermuteDevices, Constructor) {
     // Primary key is required.
     EXPECT_ANY_THROW(t::geometry::TensorMap());
 
+    // Delete primary key.
+    EXPECT_ANY_THROW(tm0.Erase("positions"));
+
+    // Reserved keys.
+    EXPECT_ANY_THROW(tm0.insert(
+            {"primary_key", core::Tensor::Zeros({2, 3}, dtype, device)}));
+
     // Iterators.
     std::map<std::string, core::Tensor> tensor_map(
             {{"positions", core::Tensor::Zeros({10, 3}, dtype, device)},
diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci
index cd52a838882..73feb7bffd6 100644
--- a/docker/Dockerfile.ci
+++ b/docker/Dockerfile.ci
@@ -67,34 +67,48 @@ RUN if [ "${BUILD_SYCL_MODULE}" = "ON" ]; then \
         rm -rf /etc/apt/sources.list.d/oneAPI.list; \
     fi
 
-# Dependencies: basic
+# Dependencies: basic and python-build
 RUN apt-get update && apt-get install -y \
     git  \
     wget \
     curl \
     build-essential \
     pkg-config \
+    zlib1g \
+    zlib1g-dev \
+    libssl-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
  && rm -rf /var/lib/apt/lists/*
 
-# Miniconda or Intel conda
-# The **/open3d/bin paths are used during docker run, in this way docker run
+# pyenv or Intel Python
+# The pyenv python paths are used during docker run, in this way docker run
 # does not need to activate the environment again.
-ENV PATH="/root/miniconda3/bin:${PATH}"
-ENV PATH="/root/miniconda3/envs/open3d/bin:${PATH}"
+# The soft link from the python patch level version to the python mino version
+# ensures python wheel commands (i.e. open3d) are in PATH, since we don't know
+# which patch level pyenv will install (latest).
+ENV PYENV_ROOT=/root/.pyenv
+ENV PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PYENV_ROOT/versions/$PYTHON_VERSION/bin:$PATH"
 ENV PATH="/opt/intel/oneapi/intelpython/latest/bin:${PATH}"
-ENV PATH="/opt/intel/oneapi/intelpython/latest/envs/open3d/bin:${PATH}"
 RUN if [ "${BUILD_SYCL_MODULE}" = "OFF" ]; then \
-        wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh; \
-        bash Miniconda3-latest-Linux-x86_64.sh -b; \
-        rm Miniconda3-latest-Linux-x86_64.sh; \
+        curl https://pyenv.run | bash \
+        && pyenv update \
+        && pyenv install $PYTHON_VERSION \
+        && pyenv global $PYTHON_VERSION \
+        && pyenv rehash \
+        && ln -s $PYENV_ROOT/versions/${PYTHON_VERSION}* $PYENV_ROOT/versions/${PYTHON_VERSION}; \
     fi
-RUN conda --version \
- && conda create -y -n open3d python=${PYTHON_VERSION}
+RUN python --version && pip --version
 
-# Activate open3d virtualenv
-# This works during docker build. It becomes the prefix of all RUN commands.
-# Ref: https://stackoverflow.com/a/60148365/1255535
-SHELL ["conda", "run", "-n", "open3d", "/bin/bash", "-o", "pipefail", "-c"]
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 # Dependencies: cmake
 ENV PATH=${HOME}/${CMAKE_VERSION}/bin:${PATH}
diff --git a/docker/docker_test.sh b/docker/docker_test.sh
index 441b17ed4e3..d328d14535d 100755
--- a/docker/docker_test.sh
+++ b/docker/docker_test.sh
@@ -141,7 +141,7 @@ cpp_python_linking_uninstall_test() {
     # Python test
     echo "pytest is randomized, add --randomly-seed=SEED to repeat the test sequence."
     ${docker_run} -i --rm "${DOCKER_TAG}" /bin/bash -c " \
-        python -m pytest python/test ${pytest_args} -s"
+        python  -W default -m pytest python/test ${pytest_args} -s"
     restart_docker_daemon_if_on_gcloud
 
     # Command-line tools test
diff --git a/python/open3d/__init__.py b/python/open3d/__init__.py
index 141bf426c50..f7354b2a90c 100644
--- a/python/open3d/__init__.py
+++ b/python/open3d/__init__.py
@@ -45,14 +45,6 @@ def load_cdll(path):
 if sys.platform == "win32":  # Unix: Use rpath to find libraries
     _win32_dll_dir = os.add_dll_directory(str(Path(__file__).parent))
 
-if _build_config["BUILD_GUI"] and not (find_library("c++abi") or
-                                       find_library("c++")):
-    try:  # Preload libc++.so and libc++abi.so (required by filament)
-        load_cdll(str(next((Path(__file__).parent).glob("*c++abi.*"))))
-        load_cdll(str(next((Path(__file__).parent).glob("*c++.*"))))
-    except StopIteration:  # Not found: check system paths while loading
-        pass
-
 __DEVICE_API__ = "cpu"
 if _build_config["BUILD_CUDA_MODULE"]:
     # Load CPU pybind dll gracefully without introducing new python variable.
diff --git a/util/install_deps_ubuntu.sh b/util/install_deps_ubuntu.sh
index 3e359a1a6e7..2786b19c23b 100755
--- a/util/install_deps_ubuntu.sh
+++ b/util/install_deps_ubuntu.sh
@@ -39,17 +39,22 @@ eval $(
     echo DISTRIB_ID="$DISTRIB_ID";
     echo DISTRIB_RELEASE="$DISTRIB_RELEASE"
 )
+# To avoid dependence on libunwind, we don't want to use clang / libc++ versions later than 11.
+# Ubuntu 20.04's has versions 8, 10 or 12 while Ubuntu 22.04 has versions 11 and later.
 if [ "$DISTRIB_ID" == "Ubuntu" -a "$DISTRIB_RELEASE" == "20.04" ]; then
-    # Ubuntu 20.04's clang/libc++-dev/libc++abi-dev are version 8, 10 or 12.
-    # To avoid dependence on libunwind, we don't want to use versions later than 10.
     deps=("${deps[@]/clang/clang-10}")
     deps=("${deps[@]/libc++-dev/libc++-10-dev}")
     deps=("${deps[@]/libc++abi-dev/libc++abi-10-dev}")
 fi
+if [ "$DISTRIB_ID" == "Ubuntu" -a "$DISTRIB_RELEASE" == "22.04" ]; then
+    deps=("${deps[@]/clang/clang-11}")
+    deps=("${deps[@]/libc++-dev/libc++-11-dev}")
+    deps=("${deps[@]/libc++abi-dev/libc++abi-11-dev}")
+fi
 
 # Special case for ARM64
 if [ "$(uname -m)" == "aarch64" ]; then
-    # For compling LAPACK in OpenBLAS
+    # For compiling LAPACK in OpenBLAS
     deps+=("gfortran")
 fi
 

From dd0d35956e1f9744563c5e0631474a243c9d861d Mon Sep 17 00:00:00 2001
From: Sameer Sheorey <41028320+ssheorey@users.noreply.github.com>
Date: Tue, 1 Oct 2024 13:31:47 -0700
Subject: [PATCH 3/4] jinja2 CVE fix (#6992)

https://github.com/isl-org/Open3D/security/dependabot/6
---
 cpp/pybind/t/geometry/boundingvolume.cpp | 48 ++++--------------------
 docs/make_docs.py                        |  6 +--
 docs/requirements.txt                    |  5 ++-
 3 files changed, 14 insertions(+), 45 deletions(-)

diff --git a/cpp/pybind/t/geometry/boundingvolume.cpp b/cpp/pybind/t/geometry/boundingvolume.cpp
index 981b8f3dd46..8e4494d1b79 100644
--- a/cpp/pybind/t/geometry/boundingvolume.cpp
+++ b/cpp/pybind/t/geometry/boundingvolume.cpp
@@ -22,51 +22,19 @@ void pybind_boundingvolume_declarations(py::module& m) {
                std::shared_ptr<AxisAlignedBoundingBox>, Geometry,
                DrawableGeometry>
             aabb(m, "AxisAlignedBoundingBox",
-                 R"(A bounding box that is aligned along the coordinate axes
-and defined by the min_bound and max_bound."
-- (min_bound, max_bound): Lower and upper bounds of the bounding box for all
-axes.
-    - Usage
-        - AxisAlignedBoundingBox::GetMinBound()
-        - AxisAlignedBoundingBox::SetMinBound(const core::Tensor &min_bound)
-        - AxisAlignedBoundingBox::GetMaxBound()
-        - AxisAlignedBoundingBox::SetMaxBound(const core::Tensor &max_bound)
-    - Value tensor must have shape {3,}.
-    - Value tensor must have the same data type and device.
-    - Value tensor can only be float32 (default) or float64.
-    - The device of the tensor determines the device of the box.
+                 R"(A bounding box that is aligned along the coordinate axes and
+has the properties:
 
-- color: Color of the bounding box.
-    - Usage
-        - AxisAlignedBoundingBox::GetColor()
-        - AxisAlignedBoundingBox::SetColor(const core::Tensor &color)
-    - Value tensor must have shape {3,}.
-    - Value tensor can only be float32 (default) or float64.
-    - Value tensor can only be range [0.0, 1.0].)");
+- (``min_bound``, ``max_bound``): Lower and upper bounds of the bounding box for all axes. These are tensors with shape (3,) and a common data type and device. The data type can only be ``open3d.core.float32`` (default) or ``open3d.core.float64``. The device of the tensor determines the device of the box.
+- ``color``: Color of the bounding box is a tensor with shape (3,) and a data type ``open3d.core.float32`` (default) or ``open3d.core.float64``. Values can only be in the range [0.0, 1.0].)");
     py::class_<OrientedBoundingBox, PyGeometry<OrientedBoundingBox>,
                std::shared_ptr<OrientedBoundingBox>, Geometry, DrawableGeometry>
             obb(m, "OrientedBoundingBox",
-                R"(A bounding box oriented along an arbitrary frame of reference.
-- (center, rotation, extent): The oriented bounding box is defined by its
-center position, rotation maxtrix and extent.
-    - Usage
-        - OrientedBoundingBox::GetCenter()
-        - OrientedBoundingBox::SetCenter(const core::Tensor &center)
-        - OrientedBoundingBox::GetRotation()
-        - OrientedBoundingBox::SetRotation(const core::Tensor &rotation)
-    - Value tensor of center and extent must have shape {3,}.
-    - Value tensor of rotation must have shape {3, 3}.
-    - Value tensor must have the same data type and device.
-    - Value tensor can only be float32 (default) or float64.
-    - The device of the tensor determines the device of the box.
+                R"(A bounding box oriented along an arbitrary frame of reference
+with the properties:
 
-- color: Color of the bounding box.
-    - Usage
-        - OrientedBoundingBox::GetColor()
-        - OrientedBoundingBox::SetColor(const core::Tensor &color)
-    - Value tensor must have shape {3,}.
-    - Value tensor can only be float32 (default) or float64.
-    - Value tensor can only be range [0.0, 1.0].)");
+- (``center``, ``rotation``, ``extent``): The oriented bounding box is defined by its center position (shape (3,)), rotation maxtrix (shape (3,3)) and extent (shape (3,)).  Each of these tensors must have the same data type and device. The data type can only be ``open3d.core.float32`` (default) or ``open3d.core.float64``. The device of the tensor determines the device of the box.
+- ``color``: Color of the bounding box is a tensor with shape (3,) and a data type ``open3d.core.float32`` (default) or ``open3d.core.float64``. Values can only be in the range [0.0, 1.0].)");
 }
 void pybind_boundingvolume_definitions(py::module& m) {
     auto aabb = static_cast<py::class_<AxisAlignedBoundingBox,
diff --git a/docs/make_docs.py b/docs/make_docs.py
index 0803b36bba8..c49f7e31af1 100644
--- a/docs/make_docs.py
+++ b/docs/make_docs.py
@@ -77,9 +77,9 @@ def generate_rst(self):
             try:
                 module = self._try_import_module(module_name)
                 self._generate_module_class_function_docs(module_name, module)
-            except:
-                print("[Warning] Module {} cannot be imported.".format(
-                    module_name))
+            except Exception as _e:
+                print(f"[Warning] Module {module_name} cannot be imported: "
+                      f"{_e}.")
 
     @staticmethod
     def _get_documented_module_names():
diff --git a/docs/requirements.txt b/docs/requirements.txt
index a974fc66a1d..c1836001d10 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,9 +1,10 @@
 docutils==0.20.1
 furo==2023.9.10
-jinja2==3.1.3
+jinja2==3.1.4
 m2r2==0.3.3.post2
 matplotlib==3.7.3
 nbsphinx==0.9.3
 sphinx==7.1.2
 nbconvert==6.5.4
-lxml[html_clean]==5.2.1
+lxml==5.2.1
+lxml_html_clean==0.2.2

From db00e339c1645440dea6951c2971ffa759934112 Mon Sep 17 00:00:00 2001
From: manuelvogel12 <120781514+manuelvogel12@users.noreply.github.com>
Date: Thu, 3 Oct 2024 21:02:19 +0200
Subject: [PATCH 4/4] Faster CPU (Arg-)Reductions (#6989)

* Avoid multithread access to shared array in a loop (CPU Reductions)
* Add two pass CPU ArgReduction Engine to speed up argreductions with single outputs
---
 cpp/open3d/core/kernel/ReductionCPU.cpp | 66 +++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 4 deletions(-)

diff --git a/cpp/open3d/core/kernel/ReductionCPU.cpp b/cpp/open3d/core/kernel/ReductionCPU.cpp
index 7caa60b34f7..710f1aaea24 100644
--- a/cpp/open3d/core/kernel/ReductionCPU.cpp
+++ b/cpp/open3d/core/kernel/ReductionCPU.cpp
@@ -122,13 +122,14 @@ class CPUReductionEngine {
         for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
             int64_t start = thread_idx * workload_per_thread;
             int64_t end = std::min(start + workload_per_thread, num_workloads);
+            scalar_t local_result = identity;
             for (int64_t workload_idx = start; workload_idx < end;
                  ++workload_idx) {
                 scalar_t* src = reinterpret_cast<scalar_t*>(
                         indexer.GetInputPtr(0, workload_idx));
-                thread_results[thread_idx] =
-                        element_kernel(*src, thread_results[thread_idx]);
+                local_result = element_kernel(*src, local_result);
             }
+            thread_results[thread_idx] = local_result;
         }
         scalar_t* dst = reinterpret_cast<scalar_t*>(indexer.GetOutputPtr(0));
         for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
@@ -190,14 +191,25 @@ class CPUArgReductionEngine {
         // elements. We need to keep track of the indices within each
         // sub-iteration.
         int64_t num_output_elements = indexer_.NumOutputElements();
+        if (num_output_elements <= 1) {
+            LaunchArgReductionKernelTwoPass(indexer_, reduce_func, identity);
+        } else {
+            LaunchArgReductionParallelDim(indexer_, reduce_func, identity);
+        }
+    }
 
+    template <typename scalar_t, typename func_t>
+    static void LaunchArgReductionParallelDim(const Indexer& indexer,
+                                              func_t reduce_func,
+                                              scalar_t identity) {
+        int64_t num_output_elements = indexer.NumOutputElements();
 #pragma omp parallel for schedule(static) \
         num_threads(utility::EstimateMaxThreads())
         for (int64_t output_idx = 0; output_idx < num_output_elements;
              output_idx++) {
             // sub_indexer.NumWorkloads() == ipo.
-            // sub_indexer's workload_idx is indexer_'s ipo_idx.
-            Indexer sub_indexer = indexer_.GetPerOutputIndexer(output_idx);
+            // sub_indexer's workload_idx is indexer's ipo_idx.
+            Indexer sub_indexer = indexer.GetPerOutputIndexer(output_idx);
             scalar_t dst_val = identity;
             for (int64_t workload_idx = 0;
                  workload_idx < sub_indexer.NumWorkloads(); workload_idx++) {
@@ -212,6 +224,52 @@ class CPUArgReductionEngine {
         }
     }
 
+    /// Create num_threads workers to compute partial arg reductions
+    /// and then reduce to the final results.
+    /// This only applies to arg reduction op with one output.
+    template <typename scalar_t, typename func_t>
+    static void LaunchArgReductionKernelTwoPass(const Indexer& indexer,
+                                                func_t reduce_func,
+                                                scalar_t identity) {
+        if (indexer.NumOutputElements() > 1) {
+            utility::LogError(
+                    "Internal error: two-pass arg reduction only works for "
+                    "single-output arg reduction ops.");
+        }
+        int64_t num_workloads = indexer.NumWorkloads();
+        int64_t num_threads = utility::EstimateMaxThreads();
+        int64_t workload_per_thread =
+                (num_workloads + num_threads - 1) / num_threads;
+        std::vector<int64_t> thread_results_idx(num_threads, 0);
+        std::vector<scalar_t> thread_results_val(num_threads, identity);
+
+#pragma omp parallel for schedule(static) \
+        num_threads(utility::EstimateMaxThreads())
+        for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
+            int64_t start = thread_idx * workload_per_thread;
+            int64_t end = std::min(start + workload_per_thread, num_workloads);
+            scalar_t local_result_val = identity;
+            int64_t local_result_idx = 0;
+            for (int64_t workload_idx = start; workload_idx < end;
+                 ++workload_idx) {
+                int64_t src_idx = workload_idx;
+                scalar_t* src_val = reinterpret_cast<scalar_t*>(
+                        indexer.GetInputPtr(0, workload_idx));
+                std::tie(local_result_idx, local_result_val) = reduce_func(
+                        src_idx, *src_val, local_result_idx, local_result_val);
+            }
+            thread_results_val[thread_idx] = local_result_val;
+            thread_results_idx[thread_idx] = local_result_idx;
+        }
+        scalar_t dst_val = identity;
+        int64_t* dst_idx = reinterpret_cast<int64_t*>(indexer.GetOutputPtr(0));
+        for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
+            std::tie(*dst_idx, dst_val) = reduce_func(
+                    thread_results_idx[thread_idx],
+                    thread_results_val[thread_idx], *dst_idx, dst_val);
+        }
+    }
+
 private:
     Indexer indexer_;
 };