Modularity的定义和公式推导可以参考Modularity的计算方法——社团检测中模块度计算公式详解 | 雅乐网
使用公式
$$Q = \sum_i{(\frac{e_i}{m} – (\frac{k_{C_i}}{2m})^2)} $$
其中, \(e_i\)表示社团 i 内部的边数,\(k_{C_i}\) 表示社团i内部所有点的度数之和,m表示图中边的个数。
不妨先看一下手动的计算过程:
$$Q = \sum_i{(\frac{e_i}{m} – (\frac{k_{C_i}}{2m})^2)} $$
这个式子的i是要遍历所有的社团,例子中有3个社团,所以,Q就是3个部分的和,每个部分是
$$\frac{e_i}{m} – (\frac{k_{C_i}}{2m})^2$$
上图有23条边,m=23
先来计算社团1,这个社团 内部有5条边 ,所以 \(e_1 = 5\) ,内部结点4个,度数之和是 \(k_{C_1} = 11\),这样求和的第一项就是
$$\frac{e_1}{m} – (\frac{k_{C_1}}{2m})^2 = \frac{5}{23} – (\frac{11}{2*23})^2 = \frac{339}{2116}$$
社团2和3也是这样计算:
$$\frac{e_2}{m} – (\frac{k_{C_2}}{2m})^2 = \frac{7}{23} – (\frac{17}{2*23})^2 = \frac{355}{2116}$$
$$\frac{e_3}{m} – (\frac{k_{C_3}}{2m})^2 = \frac{8}{23} – (\frac{18}{2*23})^2 = \frac{412}{2116}$$
最终的Q值就是这三个部分加起来:
$$Q =\frac{339}{2116} +\frac{355}{2116} + \frac{412}{2116} = \frac{1106}{2116}=0.52268431$$
在代码中,我们的图是用边表示的,为了得到社团内部的边,可以遍历所有的边一次,每次看这条边的两个结点是否位于同一个社团内,是的话该社团内部边数+1
为了得到社团内部结点的度数之和,可以先得到图中所有结点的度,然后按照社团内部的点累加即可。
C++代码
下面的示例代码中,图是按照边表的形式存在一个 vector<pair<int,int>> graph 中,社团文件存在 vector<vector<int>> communities; 中
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
#include <iostream> #include <vector> #include <utility> #include <algorithm> using namespace std; #include <cstdio> #include <cstdlib> #include <cstring> vector<pair<int,int> > graph; vector<vector<int>> communities; //最大节点编号 int get_max_node_id() { int res = 0; for (size_t i = 0; i < graph.size(); ++i) { res = max(res, graph[i].first); res = max(res, graph[i].second); } return res; } //返回每个结点的度 vector<int> getDegree() { vector<int> d(get_max_node_id() + 1, 0); for (size_t i = 0; i < graph.size(); ++i) { ++d[graph[i].first]; ++d[graph[i].second]; } return d; } //返回一个记录每个结点所属社团号的向量 vector<int> getcommTableOfNodes() { vector<int> res; res.resize(get_max_node_id()+1); for (size_t i = 0; i < communities.size(); ++i) { const vector<int> & nodes = communities[i]; for (size_t j = 0; j < nodes.size(); ++j) { res[nodes[j]] = i; } } return res; } //社团内部的边数 vector<int> getCommInterEdgeNum() { vector<int> v(communities.size(), 0); for (size_t i = 0; i < graph.size(); ++i) { int x = graph[i].first; int y = graph[i].second; vector<int> cid = getcommTableOfNodes(); if (cid[x] == cid[y]) { ++v[cid[x]]; } } return v; } //社团内部结点的度数之和 vector<int> getCommInterNodesDegree() { vector<int> degree = getDegree(); vector<int> v(communities.size(), 0); for (size_t i = 0; i < communities.size(); ++i) { for (size_t j = 0; j < communities[i].size(); ++j) v[i] += degree[communities[i][j]]; } return v; } double calcModularity() { //社团个数 ==3 int nc = communities.size(); //社团内部的边数 ==[5, 7, 8] vector<int> comm_inter_edge_num = getCommInterEdgeNum(); //社团内部点的度数之和 ==[11, 17, 18] vector<int> comm_inter_nodes_degree = getCommInterNodesDegree(); //总边数 ==23 double m = graph.size(); double Q = 0; for (int i = 0; i < nc; ++i) { Q += (comm_inter_edge_num[i] / m) - (comm_inter_nodes_degree[i] / (2 * m)) * (comm_inter_nodes_degree[i] / (2 * m)); } return Q; } int main() { graph.push_back(make_pair(0, 1)); graph.push_back(make_pair(0, 2)); graph.push_back(make_pair(0, 3)); graph.push_back(make_pair(1, 2)); graph.push_back(make_pair(1, 3)); graph.push_back(make_pair(2, 4)); graph.push_back(make_pair(4, 5)); graph.push_back(make_pair(4, 7)); graph.push_back(make_pair(5, 6)); graph.push_back(make_pair(5, 7)); graph.push_back(make_pair(5, 8)); graph.push_back(make_pair(6, 8)); graph.push_back(make_pair(7, 8)); graph.push_back(make_pair(7, 10)); graph.push_back(make_pair(8, 9)); graph.push_back(make_pair(9, 10)); graph.push_back(make_pair(9, 12)); graph.push_back(make_pair(9, 13)); graph.push_back(make_pair(10, 11)); graph.push_back(make_pair(10, 12)); graph.push_back(make_pair(11, 12)); graph.push_back(make_pair(11, 13)); graph.push_back(make_pair(12, 13)); vector<int> comm1 = {0,1,2,3}; vector<int> comm2 = {4, 5, 6, 7, 8}; vector<int> comm3 = {9, 10, 11, 12, 13}; communities.push_back(comm1); communities.push_back(comm2); communities.push_back(comm3); cout << calcModularity() << endl; return 0; } |