library(fastrtext)
data("train_sentences")
data("test_sentences")
texts <- tolower(train_sentences[,"text"])
tmp_file_txt <- tempfile()
tmp_file_model <- tempfile()
writeLines(text = texts, con = tmp_file_txt)
execute(commands = c("skipgram", "-input", tmp_file_txt, "-output", tmp_file_model, "-verbose", 1))
##
Read 0M words
## Number of words: 2061
## Number of labels: 0
##
Progress: 100.0% words/sec/thread: 18695 lr: 0.000000 loss: 2.759574 eta: 0h0m
## add .bin extension to the path
# test word extraction
dict <- get_dictionary(model)
print(head(dict, 5))
## [1] "the" "</s>" "of" "to" "and"
## [,1] [,2] [,3] [,4] [,5]
## time 0.1167097 -0.03827564 -0.02296746 0.1365289 0.0008005218
## timing 0.1591842 -0.09134333 -0.06769126 0.2078175 -0.0182375852
## [,6] [,7] [,8] [,9] [,10]
## time -0.1962421 -0.04538493 -0.1390844 -0.007558791 -0.1196225
## timing -0.2581849 -0.02855971 -0.1561155 0.005860637 -0.1519582
## [,11] [,12] [,13] [,14] [,15] [,16]
## time -0.2134209 0.03496689 0.03503628 -0.05943954 0.2268901 0.3177874
## timing -0.2634669 0.04080373 0.13220903 -0.03016823 0.2526892 0.2700602
## [,17] [,18] [,19] [,20] [,21] [,22]
## time 0.003870847 -0.14522921 -0.3423738 0.1725973 0.1253057 -0.2345764
## timing 0.075717948 -0.09777938 -0.3666124 0.1444575 0.1103414 -0.2548696
## [,23] [,24] [,25] [,26] [,27] [,28]
## time -0.1149856 0.04189489 -0.2064881 -0.04861909 0.2042121 -0.3838948
## timing -0.1047497 0.05796037 -0.1998294 -0.12975578 0.1627148 -0.4275751
## [,29] [,30] [,31] [,32] [,33] [,34]
## time 0.1742465 3.704954e-03 -0.001782787 0.1762601 -0.1346901 0.2329622
## timing 0.2011224 -2.773263e-05 -0.002174591 0.1241325 -0.1744607 0.2728870
## [,35] [,36] [,37] [,38] [,39] [,40]
## time 0.1986251 -0.2037776 -0.2694438 -0.1713736 -0.2674021 -0.3143962
## timing 0.1046256 -0.2029465 -0.2732884 -0.1237588 -0.2517623 -0.3545349
## [,41] [,42] [,43] [,44] [,45]
## time -0.08683745 0.01528083 0.2617419 -0.07486662 0.02389679
## timing -0.08111207 0.10806892 0.2282779 -0.06182692 -0.01142024
## [,46] [,47] [,48] [,49] [,50]
## time -0.11488790 -0.004201524 0.09214680 -0.2521136 -0.016503636
## timing -0.07636449 -0.017273193 0.03993015 -0.2802216 -0.009124223
## [,51] [,52] [,53] [,54] [,55] [,56]
## time -0.03357568 0.02165925 0.1650185 0.3534191 -0.1713252 -0.08307429
## timing -0.03858487 0.05281796 0.2244153 0.3142212 -0.1723958 -0.03110762
## [,57] [,58] [,59] [,60] [,61] [,62]
## time 0.01376791 -0.2480264 0.01402754 0.07804823 -0.09069949 -0.2537512
## timing 0.03472624 -0.1794501 -0.01637903 0.06431359 -0.07272011 -0.2643290
## [,63] [,64] [,65] [,66] [,67] [,68]
## time -0.03725076 0.10802998 -0.00367105 -0.2886443 -0.1725299 -0.1496135
## timing -0.04355646 0.04266159 -0.04564099 -0.3132562 -0.2363685 -0.1228809
## [,69] [,70] [,71] [,72] [,73]
## time 0.09906127 -0.03466756 0.01421534 0.2391183 -0.016443942
## timing 0.07060840 -0.07558307 0.03125404 0.2336826 0.003825146
## [,74] [,75] [,76] [,77] [,78] [,79]
## time -0.10634467 0.03111650 -0.03947933 0.1586131 0.1159388 -0.1608489
## timing -0.05555288 0.02813843 -0.02189661 0.1897576 0.0536274 -0.2191714
## [,80] [,81] [,82] [,83] [,84] [,85]
## time 0.13119103 -0.13141848 0.09615964 -0.1803136 0.04562496 0.01560315
## timing 0.08859968 -0.04089915 0.06382442 -0.2135662 0.05951345 0.01534841
## [,86] [,87] [,88] [,89] [,90] [,91]
## time -0.04462345 0.03154065 0.1247594 -0.2039463 -0.1595443 0.2324333
## timing -0.06802195 0.02304716 0.1272477 -0.1697713 -0.1385920 0.2666971
## [,92] [,93] [,94] [,95] [,96] [,97]
## time 0.1039973 0.1870850 -0.10733225 -0.3611160 -0.1570095 -0.1877101
## timing 0.1064991 0.1289596 -0.05836376 -0.3534068 -0.1078936 -0.1590668
## [,98] [,99] [,100]
## time -0.1019809 -0.01484638 0.06689427
## timing -0.1040684 0.03702667 0.09527219
## [,1]
## [1,] 0.0303869
# free memory
unlink(tmp_file_txt)
unlink(tmp_file_model)
rm(model)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 524984 28.1 940480 50.3 750400 40.1
## Vcells 1035933 8.0 2060183 15.8 1378408 10.6