#Using Jaccard similarity index in projection of two 1-mode networks (users and posts) from the 2-mode (or 'bimodal') Facebook network
#ACSPRI Summer Program 2017
#Robert Ackland
#7 February 2017
library(igraph)
library(Matrix)
#In "SocialMediaLab Tutorial -- Part 2 (Facebook)" Rmd file, we collected Facebook data
#from the Coal Seam Gas page
#The igraph graph object (bimodal network) was saved as a graphml file
jaccard <- function(m) {
#source: http://stats.stackexchange.com/questions/49453/calculating-jaccard-or-other-association-coefficient-for-binary-data-using-matri
## common values:
A = tcrossprod(m)
## indexes for non-zero common values
im = which(A > 0, arr.ind=TRUE)
## counts for each row
b = rowSums(m)
## only non-zero values of common
Aim = A[im]
## Jacard formula: #common / (#i + #j - #common)
J = sparseMatrix(
i = im[,1],
j = im[,2],
x = Aim / (b[im[,1]] + b[im[,2]] - Aim),
dims = dim(A)
)
return(J)
}
one_mode <- function(g, j_thresh){
#create the 1-mode networks (requires type attribute, as above)
im <- as_incidence_matrix(g)
##Version 2: by 'hand' all ties present (same as using bipartite.projection from igraph [Version 1], equiv to Jaccard threshold of 0)
#following works, but not needed so comment out
##to get one-mode representation of ties between users - element ij is number of posts they both answered
#m_users <- im %*% t(im)
##to get one-mode representation of ties between posts - element ij is number of users who answered commented on both posts
#m_posts <- t(im) %*% im
##eventually may explore tcrossprod(im) instead of above: supposedly faster
#g_users <- graph.adjacency(m_users, mode = "undirected")
#E(g_users)$weight <- count.multiple(g_users)
#g_resp2 <- simplify(g_users)
##Version 3: use jaccard similarity to select edges between most similar nodes
#jaccard coefficient: can do two ways
#1 using dist()
#J <- as.matrix(dist(im,method='binary'))
#J is M11/(M10+M01+M11) but dist returns (M10+M01)/(M10+M01+M11), so need following
#J <- 1-J
#the following returns identical to above but is much faster as it uses sparse matrices...so use this...
J <- jaccard(im)
g_1_mode <- graph.edgelist(which(J>j_thresh,arr.ind=T), directed = FALSE)
g_1_mode <- simplify(g_1_mode)
return(g_1_mode)
}
#read in the 2-mode graph object
g <- read.graph("g_bimodal_facebook_csg.graphml",format="graphml")
V(g)$type <- ifelse(V(g)$type=="User",FALSE,TRUE) #this gives 1-mode network of users
#V(g)$type <- ifelse(V(g)$type=="User",TRUE,FALSE) #this gives 1-mode network of posts
g_1_mode <- one_mode(g, 0.4)