## Dynamic network analysis
## ACSPRI - Big Data for Social Scientists 2015
## Author: Tim Graham
## In this dataset will be looking at users commenting over time on posts
## on the official Star Wars Facebook Page. Note: there are no
## 'likes' data because the API does not provide timestamp data for likes.
#install.packages("networkDynamic")
#library(networkDynamic)
#library(SocialMediaLab)
g_facebook_dynamic <- read.graph("Jul_02_21_01_53_2015_AEST_FacebookDynamicBimodalNetwork.graphml",format="graphml")
myTemporalFacebookData <- read.csv("facebook_temporal_data_starwars.csv",stringsAsFactors=FALSE)
# look at the range of times
range(E(g_facebook_dynamic)$timestampNumeric)
# we know that the data are collected from Facebook posts
# posted between 01/06/2015 to 01/07/2015.
# we can confirm this against the data (sanity check the data)
max(range(E(g_facebook_dynamic)$timestampNumeric)) / (60 * 60 * 24)
# we can do some descriptive analysis directly from the dataframe (myTemporalFacebookData)
# we can actually plot the timestamps
plot(as.Date(myTemporalFacebookData$commentTimestamp))
# look at 12 hour 'chunks' to plot frequencies of AM versus PM posting
chunks12Hours <- seq(1,max(range(E(g_facebook_dynamic)$timestampNumeric)),(60 * 60 * 12))
chunks12HoursFreqs <- c(rep(0,length(chunks12Hours))) # preallocate vector for speed
for (i in 1:length(chunks12Hours)) {
chunks12HoursFreqs[i] <- length(which(myTemporalFacebookData$commentTimestampConverted >= chunks12Hours[i] & myTemporalFacebookData$commentTimestampConverted < chunks12Hours[i+1]))
}
## NETWORK based representation
# Time information is completely lost in collapsing a multi-graph to a weighted graph.
# A compromise is to create ‘slices’ of the multi-graph, but collapse within each slice.
# For example, we might take the first 7 days (604800 seconds) of our data
# let's create a subgraph of the first week of activity
g_facebook_dynamic_week1 <- subgraph.edges(g_facebook_dynamic, E(g_facebook_dynamic)[timestampNumeric <= 604800])
g_facebook_dynamic_week1
# now separate this into 14 slices of 43200 seconds each
g.sl14 <- lapply(1:14, function(i) {
g <- subgraph.edges(g_facebook_dynamic_week1,E(g_facebook_dynamic_week1)[timestampNumeric > 43200*(i-1) & timestampNumeric <= 43200*i],
delete.vertices=TRUE)
# remove loops and multi-edges
simplify(g)
})
# The resulting 14 subnetworks are stored as a list of graphs. Each of these graphs
# has been defined to NOT include the full set of 3791 users/posts as its set of vertices
# because we used the `delete.vertices=TRUE` argument
sapply(g.sl14,vcount)
sapply(g.sl14,ecount)
# we can write each graph to file
for (i in 1:length(g.sl14)) {
png(paste0("dynamicNet_plot_",i,"_.png"), width=800, height=700)
plot(g.sl14[[i]], vertex.shape="none",edge.width=1.5, edge.curved = .5, edge.arrow.size=0.5, asp=9/16,margin=-0.15)
dev.off()
}