Usage of netdis with binning and expected counts customisations.

Jack Roberts

2020-07-13

Load required libraries

# Load libraries
library("netdist")
library("purrr")

Set Netdis parameters

# Maximum graphlet size to calculate counts and netdis statistic for.
max_graphlet_size <- 4

# Ego network neighbourhood size
neighbourhood_size <- 2

# Minimum size of ego networks to consider
min_ego_nodes <- 3
min_ego_edges <- 1

# Reference graph
ref_path <- system.file(file.path("extdata", "random", "ER_1250_10_1"), 
                        package = "netdist")
ref_graph <- read_simple_graph(ref_path, format = "ncol")

Load query graphs

source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
graphs <- read_simple_graphs(source_dir, format = "ncol", pattern = "*")

Default Expected Counts with Reference Graph

# Calculate netdis statistics
results <- netdis_many_to_many(graphs,
                               ref_graph,
                               max_graphlet_size = max_graphlet_size,
                               neighbourhood_size = neighbourhood_size,
                               min_ego_nodes = min_ego_nodes,
                               min_ego_edges = min_ego_edges)

print(results$netdis)
##              [,1]        [,2]       [,3]        [,4]      [,5]      [,6]
## netdis3 0.1846655 0.008264222 0.01005385 0.006777578 0.2065762 0.2091241
## netdis4 0.1749835 0.165264120 0.01969246 0.159711160 0.2917612 0.2215579
##              [,7]         [,8]         [,9]        [,10]
## netdis3 0.2075471 0.0001335756 0.0001748254 0.0005964448
## netdis4 0.4171614 0.0760242643 0.0343418653 0.1311552411
print(results$comp_spec)
##    name_a name_b index_a index_b
## 1     EBV    ECL       1       2
## 2     EBV  HSV-1       1       3
## 3     EBV   KSHV       1       4
## 4     EBV    VZV       1       5
## 5     ECL  HSV-1       2       3
## 6     ECL   KSHV       2       4
## 7     ECL    VZV       2       5
## 8   HSV-1   KSHV       3       4
## 9   HSV-1    VZV       3       5
## 10   KSHV    VZV       4       5

With Modified Binning Parameters

binning_fn <- purrr::partial(binned_densities_adaptive,
                             min_counts_per_interval = 10,
                             num_intervals = 50)


# Calculate netdis statistics
results <- netdis_many_to_many(graphs,
                               ref_graph,
                               max_graphlet_size = max_graphlet_size,
                               neighbourhood_size = neighbourhood_size,
                               min_ego_nodes = min_ego_nodes,
                               min_ego_edges = min_ego_edges,
                               binning_fn = binning_fn)

print(results$netdis)
##               [,1]        [,2]        [,3]        [,4]      [,5]      [,6]
## netdis3 0.08499773 0.005900766 0.009547675 0.007177066 0.1078916 0.1144589
## netdis4 0.20037679 0.045244760 0.018904439 0.112043371 0.3361503 0.2631420
##              [,7]         [,8]         [,9]        [,10]
## netdis3 0.1101426 0.0006494388 2.478794e-05 0.0004097632
## netdis4 0.4818139 0.0274434372 3.227187e-02 0.0928126401
print(results$comp_spec)
##    name_a name_b index_a index_b
## 1     EBV    ECL       1       2
## 2     EBV  HSV-1       1       3
## 3     EBV   KSHV       1       4
## 4     EBV    VZV       1       5
## 5     ECL  HSV-1       2       3
## 6     ECL   KSHV       2       4
## 7     ECL    VZV       2       5
## 8   HSV-1   KSHV       3       4
## 9   HSV-1    VZV       3       5
## 10   KSHV    VZV       4       5

With Modified Expected Counts: Geometric Poisson

bin_counts_fn <- density_binned_counts_gp

exp_counts_fn <- purrr::partial(netdis_expected_counts,
                                scale_fn = NULL)

# Calculate netdis statistics
results <- netdis_many_to_many(graphs,
                               ref_graph = NULL,
                               max_graphlet_size = max_graphlet_size,
                               neighbourhood_size = neighbourhood_size,
                               min_ego_nodes = min_ego_nodes,
                               min_ego_edges = min_ego_edges,
                               bin_counts_fn = bin_counts_fn,
                               exp_counts_fn = exp_counts_fn)

print(results$netdis)
##              [,1]      [,2]      [,3]       [,4]       [,5]       [,6]
## netdis3 0.8822527 0.9101084 0.8838054 0.96266771 0.04173551 0.03585169
## netdis4 0.1892716 0.5735233 0.3719671 0.04604718 0.60270399 0.20370737
##               [,7]         [,8]        [,9]       [,10]
## netdis3 0.06271238 0.0004211575 0.005364888 0.009114229
## netdis4 0.12978637 0.7173089685 0.487688692 0.371848474
print(results$comp_spec)
##    name_a name_b index_a index_b
## 1     EBV    ECL       1       2
## 2     EBV  HSV-1       1       3
## 3     EBV   KSHV       1       4
## 4     EBV    VZV       1       5
## 5     ECL  HSV-1       2       3
## 6     ECL   KSHV       2       4
## 7     ECL    VZV       2       5
## 8   HSV-1   KSHV       3       4
## 9   HSV-1    VZV       3       5
## 10   KSHV    VZV       4       5

With Modified Expected Counts: Simple Mean

binning_fn <- single_density_bin
bin_counts_fn <- density_binned_counts
exp_counts_fn <- netdis_expected_counts

# Calculate netdis statistics
results <- netdis_many_to_many(graphs,
                               ref_graph = NULL,
                               max_graphlet_size = max_graphlet_size,
                               neighbourhood_size = neighbourhood_size,
                               min_ego_nodes = min_ego_nodes,
                               min_ego_edges = min_ego_edges,
                               binning_fn = binning_fn,
                               bin_counts_fn = bin_counts_fn,
                               exp_counts_fn = exp_counts_fn)

print(results$netdis)
##              [,1]      [,2]      [,3]       [,4]      [,5]      [,6]      [,7]
## netdis3 0.3116860 0.8254261 0.8768637 0.04053921 0.8531485 0.8226894 0.2353732
## netdis4 0.9592365 0.2009423 0.7974697 0.21688688 0.7734930 0.2144558 0.8030030
##               [,8]      [,9]     [,10]
## netdis3 0.01970843 0.8288649 0.9167543
## netdis4 0.39992007 0.3300305 0.6301018
print(results$comp_spec)
##    name_a name_b index_a index_b
## 1     EBV    ECL       1       2
## 2     EBV  HSV-1       1       3
## 3     EBV   KSHV       1       4
## 4     EBV    VZV       1       5
## 5     ECL  HSV-1       2       3
## 6     ECL   KSHV       2       4
## 7     ECL    VZV       2       5
## 8   HSV-1   KSHV       3       4
## 9   HSV-1    VZV       3       5
## 10   KSHV    VZV       4       5