Source code for torch_geometric_signed_directed.data.signed.polarized_SSBM

from typing import Tuple
import random
import math

import numpy as np
import networkx as nx
import scipy.sparse as sp
import numpy.random as rnd

from .SSBM import SSBM


[docs]def polarized_SSBM(total_n: int = 100, num_com: int = 3, N: int = 30, K: int = 2, p: float = 0.1, eta: float = 0.1, size_ratio: float = 1) -> Tuple[Tuple[sp.spmatrix, sp.spmatrix], np.array, np.array]: """A polarized signed stochastic block model graph generator from the `SSSNET: Semi-Supervised Signed Network Clustering <https://arxiv.org/pdf/2110.06623.pdf>`_ paper. Arg types: * **total_n** (int) - Total number of nodes in the polarized network. * **num_com** (int) - Number of conflicting communities. * **N** (int) - Default size of an SSBM community. * **K** (int) - Number of blocks(clusters) within a conflicting community. * **p** (int) - Probability of existence of an edge. * **eta** (float) - Sign flip probability, 0 <= eta <= 0.5. * **size_ratio** (float) - The communities have number of nodes multiples of each other, with the largest size_ratio times the number of nodes of the smallest. Return types: * **A_p_new, A_n_new** (sp.spmatrix) - Positive and negative parts of the polarized network. * **labels_new** (np.array) - Ordered labels of the nodes, with conflicting communities labeled together, cluster 0 is the ambient cluster. * **conflict_groups** (np.array) - An array indicating which conflicting group the node is in, 0 is ambient. """ select_num = math.floor( total_n*p/4*total_n) # number of links in large_A_p and large_A_n respectively # note that we need to add each link twice for the undirected graph tuples_full = [] for x in range(total_n): for y in range(total_n): tuples_full.append((x, y)) full_idx = random.sample(tuples_full, select_num*2) full_idx = list(set([(x[1], x[0]) for x in full_idx]) - set([(x[0], x[1]) for x in full_idx])) select_num = math.floor(len(full_idx)/2) p_row_idx = [] p_col_idx = [] p_dat = [] for p_idx in full_idx[:select_num]: p_row_idx.append(p_idx[0]) p_col_idx.append(p_idx[1]) p_dat.append(1) p_row_idx.append(p_idx[1]) p_col_idx.append(p_idx[0]) p_dat.append(1) n_row_idx = [] n_col_idx = [] n_dat = [] for n_idx in full_idx[select_num:2*select_num]: n_row_idx.append(n_idx[0]) n_col_idx.append(n_idx[1]) n_dat.append(1) n_row_idx.append(n_idx[1]) n_col_idx.append(n_idx[0]) n_dat.append(1) large_A_p = sp.coo_matrix( (p_dat, (p_row_idx, p_col_idx)), shape=(total_n, total_n)).tolil() large_A_n = sp.coo_matrix( (n_dat, (n_row_idx, n_col_idx)), shape=(total_n, total_n)).tolil() large_labels = np.zeros(total_n) conflict_groups = np.zeros(total_n) total_n_com = num_com * N # the total number of nodes in communities size = [0] * num_com if size_ratio > 1: ratio_each = np.power(size_ratio, 1/(num_com - 1)) smallest_size = math.floor( total_n_com*(1-ratio_each)/(1-np.power(ratio_each, num_com))) size[0] = smallest_size if num_com > 2: for i in range(1, num_com - 1): size[i] = math.floor(size[i-1] * ratio_each) size[num_com-1] = total_n_com - np.sum(size) else: # degenerate case, equaivalent to 'uniform' sizes size = [math.floor((i + 1) * total_n_com / num_com) - math.floor((i) * total_n_com / num_com) for i in range(num_com)] counter = 0 # a counter of how many nodes have already been counted for com in range(num_com): com_size = size[com] # the size of this conflicting group, a SSBM (A_p, A_n), labels = SSBM(n=com_size, k=K, pin=p, etain=eta, size_ratio=size_ratio) large_A_p[counter:counter+com_size, counter:counter+com_size] = A_p large_A_n[counter:counter+com_size, counter:counter+com_size] = A_n large_labels[counter:counter+com_size] = labels + (2*com + 1) conflict_groups[counter:counter+com_size] = com + 1 # start from 1 counter += com_size # do permutation # perm[i] is the new index for node i (i is the old index) # label of perm[i] should therefore be the current label of node i, similar for conflict group number np.random.seed(2020) perm = rnd.permutation(total_n) p_row_idx, p_col_idx = large_A_p.nonzero() large_A_p_values = sp.csc_matrix(large_A_p).data p_row_idx = perm[p_row_idx] p_col_idx = perm[p_col_idx] large_A_p = sp.coo_matrix( (large_A_p_values, (p_row_idx, p_col_idx)), shape=(total_n, total_n)).tocsc() n_row_idx, n_col_idx = large_A_n.nonzero() large_A_n_values = sp.csc_matrix(large_A_n).data n_row_idx = perm[n_row_idx] n_col_idx = perm[n_col_idx] large_A_n = sp.coo_matrix( (large_A_n_values, (n_row_idx, n_col_idx)), shape=(total_n, total_n)).tocsc() large_labels_old = large_labels.copy() conflict_groups_old = conflict_groups.copy() for i in range(total_n): large_labels[perm[i]] = large_labels_old[i] conflict_groups[perm[i]] = conflict_groups_old[i] # now fix the network connectedness and degree # first we fix connectedness G = nx.from_scipy_sparse_matrix(large_A_p-large_A_n) largest_cc = max(nx.connected_components(G), key=len) A_p_new = sp.lil_matrix(large_A_p[list(largest_cc)][:, list(largest_cc)]) A_n_new = sp.lil_matrix(large_A_n[list(largest_cc)][:, list(largest_cc)]) labels_new = large_labels[list(largest_cc)] conflict_groups = conflict_groups[list(largest_cc)] A_bar = sp.lil_matrix(A_p_new+A_n_new) # sum over columns to get row sums A_bar_row_sum = np.array(sp.lil_matrix.sum(A_bar, axis=1)) if np.sum(A_bar_row_sum <= 2): # only do this fix if few degree node exists for i in np.arange(len(labels_new))[(A_bar_row_sum <= 2).flatten()]: row_to_fix = A_bar[i].toarray().flatten() # only do this fix if it is (still) a degree one node, as we may fix the nodes on the way if sum(row_to_fix != 0) == 1: # add two more edges, only add to locations currently without edges node_idx = (np.arange(len(labels_new))[row_to_fix == 0])[ random.sample(range(len(labels_new)-sum(row_to_fix != 0)), 2)] flip_flag = np.random.binomial( size=2, n=1, p=eta) # whether to do sign flip for j, flip in zip(node_idx, flip_flag): # fix A_bar and then adjancency matrix A_bar[i, j] = 1 # += 1 A_bar[j, i] = 1 # += 1 # only apply to conflicting groups if conflict_groups[i] == conflict_groups[j]: if labels_new[j] == labels_new[i]: if flip: A_n_new[i, j] = 1 # += 1 A_n_new[j, i] = 1 # += 1 else: A_p_new[i, j] = 1 # += 1 A_p_new[j, i] = 1 # += 1 else: if not flip: A_n_new[i, j] = 1 # += 1 A_n_new[j, i] = 1 # += 1 else: A_p_new[i, j] = 1 # += 1 A_p_new[j, i] = 1 # += 1 else: negative = np.random.binomial(size=1, n=1, p=0.5)[0] if negative: A_n_new[i, j] = 1 # += 1 A_n_new[j, i] = 1 # += 1 else: A_p_new[i, j] = 1 # += 1 A_p_new[j, i] = 1 # += 1 # only do this fix if it is (still) a degree two node, as we may fix the nodes on the way if sum(row_to_fix != 0) == 2: # add one more edge, only add to locations currently without edges node_idx = (np.arange(len(labels_new))[row_to_fix == 0])[ np.random.randint(len(labels_new)-sum(row_to_fix != 0), size=1)] flip_flag = np.random.binomial( size=1, n=1, p=eta) # whether to do sign flip for j, flip in zip(node_idx, flip_flag): # fix A_bar and then adjancency matrix A_bar[i, j] = 1 # += 1 A_bar[j, i] = 1 # += 1 # only apply to conflicting groups if conflict_groups[i] == conflict_groups[j]: if labels_new[j] == labels_new[i]: if flip: A_n_new[i, j] = 1 # += 1 A_n_new[j, i] = 1 # += 1 else: A_p_new[i, j] = 1 # += 1 A_p_new[j, i] = 1 # += 1 else: if not flip: A_n_new[i, j] = 1 # += 1 A_n_new[j, i] = 1 # += 1 else: A_p_new[i, j] = 1 # += 1 A_p_new[j, i] = 1 # += 1 else: negative = np.random.binomial(size=1, n=1, p=0.5)[0] if negative: A_n_new[i, j] = 1 # += 1 A_n_new[j, i] = 1 # += 1 else: A_p_new[i, j] = 1 # += 1 A_p_new[j, i] = 1 # += 1 return (A_p_new, A_n_new), labels_new, conflict_groups