Skip to content

Commit

Permalink
Update generation and flattening of dendrogram in Leiden (rapidsai#4347)
Browse files Browse the repository at this point in the history
The Leiden dendrogram was being populated with both Louvain cluster assignment and mapping between Leiden and Louvain clustering.  The flattening of the dendrogram was being accomplished by applying the Louvain clustering, then mapping between the Leiden clusters at each level.

Unfortunately, the Leiden to Louvain mapping allows a many-to-one relationship, so in certain cases the flattening was non-deterministic.  There wasn't enough information in the dendrogram to perform the mapping deterministically.

This PR modifies the dendrogram to be similar to Louvain, just keeping the cluster assignments at each level.  The mapping between Louvain and Leiden clusters is done when creating the dendrogram, where there is sufficient information to perform this translation deterministically.

Closes rapidsai#4072

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Naim (https://github.com/naimnv)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Naim (https://github.com/naimnv)
  - Rick Ratzel (https://github.com/rlratzel)

URL: rapidsai#4347
  • Loading branch information
ChuckHastings authored Apr 22, 2024
1 parent c31cd63 commit ca88a47
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 65 deletions.
27 changes: 0 additions & 27 deletions cpp/src/community/flatten_dendrogram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,4 @@ void partition_at_level(raft::handle_t const& handle,
});
}

template <typename vertex_t, bool multi_gpu>
void leiden_partition_at_level(raft::handle_t const& handle,
Dendrogram<vertex_t> const& dendrogram,
vertex_t* d_partition,
size_t level)
{
vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0);
raft::copy(
d_partition, dendrogram.get_level_ptr_nocheck(0), local_num_verts, handle.get_stream());

rmm::device_uvector<vertex_t> local_vertex_ids_v(local_num_verts, handle.get_stream());

std::for_each(
thrust::make_counting_iterator<size_t>(0),
thrust::make_counting_iterator<size_t>((level - 1) / 2),
[&handle, &dendrogram, &local_vertex_ids_v, &d_partition, local_num_verts](size_t l) {
cugraph::relabel<vertex_t, multi_gpu>(
handle,
std::tuple<vertex_t const*, vertex_t const*>(dendrogram.get_level_ptr_nocheck(2 * l + 1),
dendrogram.get_level_ptr_nocheck(2 * l + 2)),
dendrogram.get_level_size_nocheck(2 * l + 1),
d_partition,
local_num_verts,
false);
});
}

} // namespace cugraph
131 changes: 96 additions & 35 deletions cpp/src/community/leiden_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(

rmm::device_uvector<vertex_t> louvain_of_refined_graph(0, handle.get_stream()); // #V

while (dendrogram->num_levels() < 2 * max_level + 1) {
while (dendrogram->num_levels() < max_level) {
//
// Initialize every cluster to reference each vertex to itself
//
Expand Down Expand Up @@ -249,8 +249,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
detail::timer_start<graph_view_t::is_multi_gpu>(handle, hr_timer, "update_clustering");
#endif

rmm::device_uvector<vertex_t> louvain_assignment_for_vertices =
rmm::device_uvector<vertex_t>(dendrogram->current_level_size(), handle.get_stream());
rmm::device_uvector<vertex_t> louvain_assignment_for_vertices(dendrogram->current_level_size(),
handle.get_stream());

raft::copy(louvain_assignment_for_vertices.begin(),
dendrogram->current_level_begin(),
Expand Down Expand Up @@ -427,6 +427,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
dst_louvain_assignment_cache,
up_down);
}

// Clear buffer and contract the graph

cluster_keys.resize(0, handle.get_stream());
Expand All @@ -452,8 +453,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(

if (nr_unique_leiden < current_graph_view.number_of_vertices()) {
// Create aggregate graph based on refined (leiden) partition
std::optional<rmm::device_uvector<vertex_t>> cluster_assignment{std::nullopt};
std::tie(coarse_graph, coarsen_graph_edge_weight, cluster_assignment) =
std::optional<rmm::device_uvector<vertex_t>> numbering_map{std::nullopt};
std::tie(coarse_graph, coarsen_graph_edge_weight, numbering_map) =
coarsen_graph(handle,
current_graph_view,
current_edge_weight_view,
Expand All @@ -466,34 +467,99 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
std::make_optional<edge_property_view_t<edge_t, weight_t const*>>(
(*coarsen_graph_edge_weight).view());

// cluster_assignment contains leiden cluster ids of aggregated nodes
// After call to relabel, cluster_assignment will louvain cluster ids
// of the aggregated nodes
// FIXME: reconsider what's put into dendrogram->current_level_begin()
// at what point in the code. I'm just going to overwrite it here,
// so perhaps it should be in different structures until now

// New approach, mimic Louvain, we'll store the Leiden results in the dendrogram
raft::copy(dendrogram->current_level_begin(),
refined_leiden_partition.data(),
refined_leiden_partition.size(),
handle.get_stream());

louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(),
handle.get_stream());
rmm::device_uvector<vertex_t> numeric_sequence(
current_graph_view.local_vertex_partition_range_size(), handle.get_stream());

detail::sequence_fill(handle.get_stream(),
numeric_sequence.data(),
numeric_sequence.size(),
current_graph_view.local_vertex_partition_range_first());

relabel<vertex_t, multi_gpu>(
handle,
std::make_tuple(static_cast<vertex_t const*>((*numbering_map).begin()),
static_cast<vertex_t const*>(numeric_sequence.begin())),
(*numbering_map).size(),
dendrogram->current_level_begin(),
dendrogram->current_level_size(),
false);

raft::copy(louvain_of_refined_graph.begin(),
numbering_map->data(),
numbering_map->size(),
handle.get_stream());

relabel<vertex_t, multi_gpu>(
handle,
std::make_tuple(static_cast<vertex_t const*>(leiden_to_louvain_map.first.begin()),
static_cast<vertex_t const*>(leiden_to_louvain_map.second.begin())),
leiden_to_louvain_map.first.size(),
(*cluster_assignment).data(),
(*cluster_assignment).size(),
louvain_of_refined_graph.data(),
louvain_of_refined_graph.size(),
false);
// louvain assignment of aggregated graph which is necessary to flatten dendrogram
dendrogram->add_level(current_graph_view.local_vertex_partition_range_first(),
current_graph_view.local_vertex_partition_range_size(),
handle.get_stream());

raft::copy(dendrogram->current_level_begin(),
(*cluster_assignment).begin(),
(*cluster_assignment).size(),
// Relabel clusters so that each cluster is identified by the lowest vertex id
// that is assigned to it. Note that numbering_map and numeric_sequence go out
// of scope at the end of this block, we will reuse their memory
raft::copy(numbering_map->begin(),
louvain_of_refined_graph.data(),
louvain_of_refined_graph.size(),
handle.get_stream());

louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(),
handle.get_stream());
thrust::sort(handle.get_thrust_policy(),
thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()),
thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end()));

raft::copy(louvain_of_refined_graph.begin(),
(*cluster_assignment).begin(),
(*cluster_assignment).size(),
handle.get_stream());
size_t new_size = thrust::distance(numbering_map->begin(),
thrust::unique_by_key(handle.get_thrust_policy(),
numbering_map->begin(),
numbering_map->end(),
numeric_sequence.begin())
.first);

numbering_map->resize(new_size, handle.get_stream());
numeric_sequence.resize(new_size, handle.get_stream());

if constexpr (multi_gpu) {
std::tie(*numbering_map, numeric_sequence) =
shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning(
handle, std::move(*numbering_map), std::move(numeric_sequence));

thrust::sort(handle.get_thrust_policy(),
thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()),
thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end()));

size_t new_size = thrust::distance(numbering_map->begin(),
thrust::unique_by_key(handle.get_thrust_policy(),
numbering_map->begin(),
numbering_map->end(),
numeric_sequence.begin())
.first);

numbering_map->resize(new_size, handle.get_stream());
numeric_sequence.resize(new_size, handle.get_stream());
}

relabel<vertex_t, multi_gpu>(
handle,
std::make_tuple(static_cast<vertex_t const*>((*numbering_map).begin()),
static_cast<vertex_t const*>(numeric_sequence.begin())),
(*numbering_map).size(),
louvain_of_refined_graph.data(),
louvain_of_refined_graph.size(),
false);
}
}

Expand Down Expand Up @@ -565,20 +631,15 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle,
Dendrogram<vertex_t> const& dendrogram,
vertex_t* clustering)
{
leiden_partition_at_level<vertex_t, multi_gpu>(
handle, dendrogram, clustering, dendrogram.num_levels());
rmm::device_uvector<vertex_t> vertex_ids_v(graph_view.number_of_vertices(), handle.get_stream());

rmm::device_uvector<vertex_t> unique_cluster_ids(graph_view.local_vertex_partition_range_size(),
handle.get_stream());
thrust::copy(handle.get_thrust_policy(),
clustering,
clustering + graph_view.local_vertex_partition_range_size(),
unique_cluster_ids.begin());

remove_duplicates<vertex_t, multi_gpu>(handle, unique_cluster_ids);
detail::sequence_fill(handle.get_stream(),
vertex_ids_v.begin(),
vertex_ids_v.size(),
graph_view.local_vertex_partition_range_first());

relabel_cluster_ids<vertex_t, multi_gpu>(
handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size());
partition_at_level<vertex_t, multi_gpu>(
handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels());
}

} // namespace detail
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/tests/community/test_leiden.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -83,8 +83,8 @@
"input_type": "CSR",
"expected_output": {
# fmt: off
"partition": [3, 3, 3, 3, 2, 2, 2, 3, 1, 3, 2, 3, 3, 3, 1, 1, 2, 3, 1, 3,
1, 3, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1],
"partition": [0, 0, 0, 0, 3, 3, 3, 0, 1, 0, 3, 0, 0, 0, 1, 1, 3, 0, 1, 0,
1, 0, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1],
# fmt: on
"modularity_score": 0.41880345,
},
Expand Down

0 comments on commit ca88a47

Please sign in to comment.