diff --git a/cpp/src/community/flatten_dendrogram.hpp b/cpp/src/community/flatten_dendrogram.hpp index 83aaf389612..4bf6d3ed240 100644 --- a/cpp/src/community/flatten_dendrogram.hpp +++ b/cpp/src/community/flatten_dendrogram.hpp @@ -60,31 +60,4 @@ void partition_at_level(raft::handle_t const& handle, }); } -template -void leiden_partition_at_level(raft::handle_t const& handle, - Dendrogram const& dendrogram, - vertex_t* d_partition, - size_t level) -{ - vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); - raft::copy( - d_partition, dendrogram.get_level_ptr_nocheck(0), local_num_verts, handle.get_stream()); - - rmm::device_uvector local_vertex_ids_v(local_num_verts, handle.get_stream()); - - std::for_each( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator((level - 1) / 2), - [&handle, &dendrogram, &local_vertex_ids_v, &d_partition, local_num_verts](size_t l) { - cugraph::relabel( - handle, - std::tuple(dendrogram.get_level_ptr_nocheck(2 * l + 1), - dendrogram.get_level_ptr_nocheck(2 * l + 2)), - dendrogram.get_level_size_nocheck(2 * l + 1), - d_partition, - local_num_verts, - false); - }); -} - } // namespace cugraph diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 499724583a9..c07f9f6ffba 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -108,7 +108,7 @@ std::pair>, weight_t> leiden( rmm::device_uvector louvain_of_refined_graph(0, handle.get_stream()); // #V - while (dendrogram->num_levels() < 2 * max_level + 1) { + while (dendrogram->num_levels() < max_level) { // // Initialize every cluster to reference each vertex to itself // @@ -249,8 +249,8 @@ std::pair>, weight_t> leiden( detail::timer_start(handle, hr_timer, "update_clustering"); #endif - rmm::device_uvector louvain_assignment_for_vertices = - rmm::device_uvector(dendrogram->current_level_size(), handle.get_stream()); + rmm::device_uvector louvain_assignment_for_vertices(dendrogram->current_level_size(), + handle.get_stream()); raft::copy(louvain_assignment_for_vertices.begin(), dendrogram->current_level_begin(), @@ -427,6 +427,7 @@ std::pair>, weight_t> leiden( dst_louvain_assignment_cache, up_down); } + // Clear buffer and contract the graph cluster_keys.resize(0, handle.get_stream()); @@ -452,8 +453,8 @@ std::pair>, weight_t> leiden( if (nr_unique_leiden < current_graph_view.number_of_vertices()) { // Create aggregate graph based on refined (leiden) partition - std::optional> cluster_assignment{std::nullopt}; - std::tie(coarse_graph, coarsen_graph_edge_weight, cluster_assignment) = + std::optional> numbering_map{std::nullopt}; + std::tie(coarse_graph, coarsen_graph_edge_weight, numbering_map) = coarsen_graph(handle, current_graph_view, current_edge_weight_view, @@ -466,34 +467,99 @@ std::pair>, weight_t> leiden( std::make_optional>( (*coarsen_graph_edge_weight).view()); - // cluster_assignment contains leiden cluster ids of aggregated nodes - // After call to relabel, cluster_assignment will louvain cluster ids - // of the aggregated nodes + // FIXME: reconsider what's put into dendrogram->current_level_begin() + // at what point in the code. I'm just going to overwrite it here, + // so perhaps it should be in different structures until now + + // New approach, mimic Louvain, we'll store the Leiden results in the dendrogram + raft::copy(dendrogram->current_level_begin(), + refined_leiden_partition.data(), + refined_leiden_partition.size(), + handle.get_stream()); + + louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), + handle.get_stream()); + rmm::device_uvector numeric_sequence( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + + detail::sequence_fill(handle.get_stream(), + numeric_sequence.data(), + numeric_sequence.size(), + current_graph_view.local_vertex_partition_range_first()); + + relabel( + handle, + std::make_tuple(static_cast((*numbering_map).begin()), + static_cast(numeric_sequence.begin())), + (*numbering_map).size(), + dendrogram->current_level_begin(), + dendrogram->current_level_size(), + false); + + raft::copy(louvain_of_refined_graph.begin(), + numbering_map->data(), + numbering_map->size(), + handle.get_stream()); + relabel( handle, std::make_tuple(static_cast(leiden_to_louvain_map.first.begin()), static_cast(leiden_to_louvain_map.second.begin())), leiden_to_louvain_map.first.size(), - (*cluster_assignment).data(), - (*cluster_assignment).size(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), false); - // louvain assignment of aggregated graph which is necessary to flatten dendrogram - dendrogram->add_level(current_graph_view.local_vertex_partition_range_first(), - current_graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - raft::copy(dendrogram->current_level_begin(), - (*cluster_assignment).begin(), - (*cluster_assignment).size(), + // Relabel clusters so that each cluster is identified by the lowest vertex id + // that is assigned to it. Note that numbering_map and numeric_sequence go out + // of scope at the end of this block, we will reuse their memory + raft::copy(numbering_map->begin(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), handle.get_stream()); - louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), - handle.get_stream()); + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()), + thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end())); - raft::copy(louvain_of_refined_graph.begin(), - (*cluster_assignment).begin(), - (*cluster_assignment).size(), - handle.get_stream()); + size_t new_size = thrust::distance(numbering_map->begin(), + thrust::unique_by_key(handle.get_thrust_policy(), + numbering_map->begin(), + numbering_map->end(), + numeric_sequence.begin()) + .first); + + numbering_map->resize(new_size, handle.get_stream()); + numeric_sequence.resize(new_size, handle.get_stream()); + + if constexpr (multi_gpu) { + std::tie(*numbering_map, numeric_sequence) = + shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, std::move(*numbering_map), std::move(numeric_sequence)); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()), + thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end())); + + size_t new_size = thrust::distance(numbering_map->begin(), + thrust::unique_by_key(handle.get_thrust_policy(), + numbering_map->begin(), + numbering_map->end(), + numeric_sequence.begin()) + .first); + + numbering_map->resize(new_size, handle.get_stream()); + numeric_sequence.resize(new_size, handle.get_stream()); + } + + relabel( + handle, + std::make_tuple(static_cast((*numbering_map).begin()), + static_cast(numeric_sequence.begin())), + (*numbering_map).size(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), + false); } } @@ -565,20 +631,15 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, Dendrogram const& dendrogram, vertex_t* clustering) { - leiden_partition_at_level( - handle, dendrogram, clustering, dendrogram.num_levels()); + rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices(), handle.get_stream()); - rmm::device_uvector unique_cluster_ids(graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - clustering, - clustering + graph_view.local_vertex_partition_range_size(), - unique_cluster_ids.begin()); - - remove_duplicates(handle, unique_cluster_ids); + detail::sequence_fill(handle.get_stream(), + vertex_ids_v.begin(), + vertex_ids_v.size(), + graph_view.local_vertex_partition_range_first()); - relabel_cluster_ids( - handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size()); + partition_at_level( + handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); } } // namespace detail diff --git a/python/cugraph/cugraph/tests/community/test_leiden.py b/python/cugraph/cugraph/tests/community/test_leiden.py index 71117c4210f..48300b2201c 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden.py +++ b/python/cugraph/cugraph/tests/community/test_leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -83,8 +83,8 @@ "input_type": "CSR", "expected_output": { # fmt: off - "partition": [3, 3, 3, 3, 2, 2, 2, 3, 1, 3, 2, 3, 3, 3, 1, 1, 2, 3, 1, 3, - 1, 3, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1], + "partition": [0, 0, 0, 0, 3, 3, 3, 0, 1, 0, 3, 0, 0, 0, 1, 1, 3, 0, 1, 0, + 1, 0, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1], # fmt: on "modularity_score": 0.41880345, },