Modularity的定义和公式推导可以参考Modularity的计算方法——社团检测中模块度计算公式详解 | 雅乐网
使用公式
$$Q = \sum_i{(\frac{e_i}{m} – (\frac{k_{C_i}}{2m})^2)} $$
其中, \(e_i\)表示社团 i 内部的边数,\(k_{C_i}\) 表示社团i内部所有点的度数之和,m表示图中边的个数。
不妨先看一下手动的计算过程:
$$Q = \sum_i{(\frac{e_i}{m} – (\frac{k_{C_i}}{2m})^2)} $$
这个式子的i是要遍历所有的社团,例子中有3个社团,所以,Q就是3个部分的和,每个部分是
$$\frac{e_i}{m} – (\frac{k_{C_i}}{2m})^2$$
上图有23条边,m=23
先来计算社团1,这个社团 内部有5条边 ,所以 \(e_1 = 5\) ,内部结点4个,度数之和是 \(k_{C_1} = 11\),这样求和的第一项就是
$$\frac{e_1}{m} – (\frac{k_{C_1}}{2m})^2 = \frac{5}{23} – (\frac{11}{2*23})^2 = \frac{339}{2116}$$
社团2和3也是这样计算:
$$\frac{e_2}{m} – (\frac{k_{C_2}}{2m})^2 = \frac{7}{23} – (\frac{17}{2*23})^2 = \frac{355}{2116}$$
$$\frac{e_3}{m} – (\frac{k_{C_3}}{2m})^2 = \frac{8}{23} – (\frac{18}{2*23})^2 = \frac{412}{2116}$$
最终的Q值就是这三个部分加起来:
$$Q =\frac{339}{2116} +\frac{355}{2116} + \frac{412}{2116} = \frac{1106}{2116}=0.52268431$$
在代码中,我们的图是用边表示的,为了得到社团内部的边,可以遍历所有的边一次,每次看这条边的两个结点是否位于同一个社团内,是的话该社团内部边数+1
为了得到社团内部结点的度数之和,可以先得到图中所有结点的度,然后按照社团内部的点累加即可。
C++代码
下面的示例代码中,图是按照边表的形式存在一个 vector<pair<int,int>> graph 中,社团文件存在 vector<vector<int>> communities; 中
#include <iostream>
#include <vector>
#include <utility>
#include <algorithm>
using namespace std;
#include <cstdio>
#include <cstdlib>
#include <cstring>
vector<pair<int,int> > graph;
vector<vector<int>> communities;
//最大节点编号
int get_max_node_id()
{
int res = 0;
for (size_t i = 0; i < graph.size(); ++i)
{
res = max(res, graph[i].first);
res = max(res, graph[i].second);
}
return res;
}
//返回每个结点的度
vector<int> getDegree()
{
vector<int> d(get_max_node_id() + 1, 0);
for (size_t i = 0; i < graph.size(); ++i)
{
++d[graph[i].first];
++d[graph[i].second];
}
return d;
}
//返回一个记录每个结点所属社团号的向量
vector<int> getcommTableOfNodes()
{
vector<int> res;
res.resize(get_max_node_id()+1);
for (size_t i = 0; i < communities.size(); ++i)
{
const vector<int> & nodes = communities[i];
for (size_t j = 0; j < nodes.size(); ++j)
{
res[nodes[j]] = i;
}
}
return res;
}
//社团内部的边数
vector<int> getCommInterEdgeNum()
{
vector<int> v(communities.size(), 0);
for (size_t i = 0; i < graph.size(); ++i)
{
int x = graph[i].first;
int y = graph[i].second;
vector<int> cid = getcommTableOfNodes();
if (cid[x] == cid[y])
{
++v[cid[x]];
}
}
return v;
}
//社团内部结点的度数之和
vector<int> getCommInterNodesDegree()
{
vector<int> degree = getDegree();
vector<int> v(communities.size(), 0);
for (size_t i = 0; i < communities.size(); ++i)
{
for (size_t j = 0; j < communities[i].size(); ++j)
v[i] += degree[communities[i][j]];
}
return v;
}
double calcModularity()
{
//社团个数 ==3
int nc = communities.size();
//社团内部的边数 ==[5, 7, 8]
vector<int> comm_inter_edge_num = getCommInterEdgeNum();
//社团内部点的度数之和 ==[11, 17, 18]
vector<int> comm_inter_nodes_degree = getCommInterNodesDegree();
//总边数 ==23
double m = graph.size();
double Q = 0;
for (int i = 0; i < nc; ++i)
{
Q += (comm_inter_edge_num[i] / m) - (comm_inter_nodes_degree[i] / (2 * m)) * (comm_inter_nodes_degree[i] / (2 * m));
}
return Q;
}
int main()
{
graph.push_back(make_pair(0, 1));
graph.push_back(make_pair(0, 2));
graph.push_back(make_pair(0, 3));
graph.push_back(make_pair(1, 2));
graph.push_back(make_pair(1, 3));
graph.push_back(make_pair(2, 4));
graph.push_back(make_pair(4, 5));
graph.push_back(make_pair(4, 7));
graph.push_back(make_pair(5, 6));
graph.push_back(make_pair(5, 7));
graph.push_back(make_pair(5, 8));
graph.push_back(make_pair(6, 8));
graph.push_back(make_pair(7, 8));
graph.push_back(make_pair(7, 10));
graph.push_back(make_pair(8, 9));
graph.push_back(make_pair(9, 10));
graph.push_back(make_pair(9, 12));
graph.push_back(make_pair(9, 13));
graph.push_back(make_pair(10, 11));
graph.push_back(make_pair(10, 12));
graph.push_back(make_pair(11, 12));
graph.push_back(make_pair(11, 13));
graph.push_back(make_pair(12, 13));
vector<int> comm1 = {0,1,2,3};
vector<int> comm2 = {4, 5, 6, 7, 8};
vector<int> comm3 = {9, 10, 11, 12, 13};
communities.push_back(comm1);
communities.push_back(comm2);
communities.push_back(comm3);
cout << calcModularity() << endl;
return 0;
}

支付宝打赏
微信打赏
文章《Modularity计算的C++代码》共有1条评论: