Usage of netdis interfaces for different pairwise comparison options.

Jack Roberts

2020-07-13

Load required libraries

# Load libraries
library("netdist")
library("purrr")

Set Netdis parameters

# Maximum graphlet size to calculate counts and netdis statistic for.
max_graphlet_size <- 4

# Ego network neighbourhood size
neighbourhood_size <- 2

# Minimum size of ego networks to consider
min_ego_nodes <- 3
min_ego_edges <- 1

# Ego network density binning parameters
min_bin_count <- 5
num_bins <- 100

# Reference graph
ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"), 
                        package = "netdist")
ref_graph <- read_simple_graph(ref_path, format = "ncol")

Compare two graphs

# Load query graphs
source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")

graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),
                             format = "ncol")

graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),
                             format = "ncol")

# Calculate netdis statistics
netdis_one_to_one(graph_1, graph_2,
                  ref_graph,
                  max_graphlet_size = max_graphlet_size,
                  neighbourhood_size = neighbourhood_size,
                  min_ego_nodes = min_ego_nodes,
                  min_ego_edges = min_ego_edges)
##   netdis3   netdis4 
## 0.1846655 0.1749835

Compare one graph to many other graphs

# Load query graphs
graphs <- read_simple_graphs(source_dir, format = "ncol", pattern = "*")
graph_1 <- graphs$EBV
graphs_compare <- graphs[c("ECL", "HSV-1", "KSHV", "VZV")]

# Calculate netdis statistics
netdis_one_to_many(graph_1, graphs_compare,
                   ref_graph,
                   max_graphlet_size = max_graphlet_size,
                   neighbourhood_size = neighbourhood_size,
                   min_ego_nodes = min_ego_nodes,
                   min_ego_edges = min_ego_edges)
##               ECL       HSV-1       KSHV         VZV
## netdis3 0.1846655 0.008264222 0.01005385 0.006777578
## netdis4 0.1749835 0.165264120 0.01969246 0.159711160

Do pairwise netdis calculations for many graphs

# Load query graphs
source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
graphs <- read_simple_graphs(source_dir, format = "ncol", pattern = "*")

# Calculate netdis statistics
results <- netdis_many_to_many(graphs,
                               ref_graph,
                               max_graphlet_size = max_graphlet_size,
                               neighbourhood_size = neighbourhood_size,
                               min_ego_nodes = min_ego_nodes,
                               min_ego_edges = min_ego_edges)

print(results$netdis)
##              [,1]        [,2]       [,3]        [,4]      [,5]      [,6]
## netdis3 0.1846655 0.008264222 0.01005385 0.006777578 0.2065762 0.2091241
## netdis4 0.1749835 0.165264120 0.01969246 0.159711160 0.2917612 0.2215579
##              [,7]         [,8]         [,9]        [,10]
## netdis3 0.2075471 0.0001335756 0.0001748254 0.0005964448
## netdis4 0.4171614 0.0760242643 0.0343418653 0.1311552411
print(results$comp_spec)
##    name_a name_b index_a index_b
## 1     EBV    ECL       1       2
## 2     EBV  HSV-1       1       3
## 3     EBV   KSHV       1       4
## 4     EBV    VZV       1       5
## 5     ECL  HSV-1       2       3
## 6     ECL   KSHV       2       4
## 7     ECL    VZV       2       5
## 8   HSV-1   KSHV       3       4
## 9   HSV-1    VZV       3       5
## 10   KSHV    VZV       4       5