FastqPuri
bloom.h
Go to the documentation of this file.
1 /****************************************************************************
2  * Copyright (C) 2017 by Paula Perez Rubio *
3  * *
4  * This file is part of FastqPuri. *
5  * *
6  * FastqPuri is free software: you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License as *
8  * published by the Free Software Foundation, either version 3 of the *
9  * License, or (at your option) any later version. *
10  * *
11  * FastqPuri is distributed in the hope that it will be useful, *
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14  * GNU General Public License for more details. *
15  * *
16  * You should have received a copy of the GNU General Public License *
17  * along with FastqPuri. *
18  * If not, see <http://www.gnu.org/licenses/>. *
19  ****************************************************************************/
20 
29 #ifndef BLOOM_MAKER_H_
30 #define BLOOM_MAKER_H_
31 
32 #include "city.h"
33 #include "fa_read.h"
34 #include "defines.h"
35 
36 
41 typedef struct _bfilter {
42  int kmersize;
43  int hashNum;
45  double falsePosRate;
46  uint64_t bfsizeBits;
47  uint64_t bfsizeBytes;
48  uint64_t nelem;
49  unsigned char *filter;
50 } Bfilter;
51 
52 
57 typedef struct _bfkmer {
58  int kmersize;
59  int hashNum;
65  unsigned char *compact;
66  uint64_t *hashValues;
67 } Bfkmer;
68 
69 void init_LUTs();
70 
71 Bfilter *init_Bfilter(int kmersize, uint64_t bfsizeBits, int hashNum,
72  double falsePosRate, uint64_t nelem);
73 
74 Bfkmer *init_Bfkmer(int kmersize, int hashNum);
75 
76 void free_Bfilter(Bfilter *ptr_bf);
77 
78 void free_Bfkmer(Bfkmer *ptr_bfkmer);
79 
80 int compact_kmer(const unsigned char *sequence, uint64_t position,
81  Bfkmer *ptr_bfkmer);
82 
83 void multiHash(Bfkmer* ptr_bfkmer);
84 
85 bool insert_and_fetch(Bfilter *pr_bf, Bfkmer* ptr_bfkmer);
86 
87 bool contains(Bfilter *ptr_bf, Bfkmer* ptr_bfkmer);
88 
89 Bfilter *create_Bfilter(Fa_data *ptr_fasta, int kmersize, uint64_t bfsizeBits,
90  int hashNum, double falsePosRate, uint64_t nelem);
91 
92 void save_Bfilter(Bfilter *ptr_bf, char *filterfile, char *paramfile);
93 
94 Bfilter *read_Bfilter(char *filterfile, char *paramfile);
95 
96 
97 #endif // endif BLOOM_MAKER_H_
void free_Bfilter(Bfilter *ptr_bf)
free Bfilter memory
Definition: bloom.c:143
void save_Bfilter(Bfilter *ptr_bf, char *filterfile, char *paramfile)
saves a bloomfilter to disk
Definition: bloom.c:544
functions for hashin strings, C translation of cityhash (C++, google)
Bfilter * init_Bfilter(int kmersize, uint64_t bfsizeBits, int hashNum, double falsePosRate, uint64_t nelem)
initialization of a Bfilter structure
Definition: bloom.c:109
stores sequences of a fasta file
Definition: fa_read.h:46
struct _bfkmer Bfkmer
stores a processed kmer (2 bits pro nucleotide)
unsigned char * filter
Definition: bloom.h:49
reads in and stores fasta files
int hashNum
Definition: bloom.h:59
Bfkmer * init_Bfkmer(int kmersize, int hashNum)
initializes a Bfkmer structure, given the kmersize and the number of hash functions ...
Definition: bloom.c:159
uint64_t * hashValues
Definition: bloom.h:66
int halfsizeBytes
Definition: bloom.h:61
void free_Bfkmer(Bfkmer *ptr_bfkmer)
free Bfkmer
Definition: bloom.c:183
Macro definitions.
int kmersizeBytes
Definition: bloom.h:60
int hashNum
Definition: bloom.h:43
Bloom filter structure.
Definition: bloom.h:41
bool contains(Bfilter *ptr_bf, Bfkmer *ptr_bfkmer)
check if kmer is contained in the filter
Definition: bloom.c:477
int hangingBases
Definition: bloom.h:63
int compact_kmer(const unsigned char *sequence, uint64_t position, Bfkmer *ptr_bfkmer)
compactifies a kmer for insertion in the bloomfilter
Definition: bloom.c:224
unsigned char * compact
Definition: bloom.h:65
struct _bfilter Bfilter
Bloom filter structure.
int kmersizeBytes
Definition: bloom.h:44
Bfilter * create_Bfilter(Fa_data *ptr_fasta, int kmersize, uint64_t bfsizeBits, int hashNum, double falsePosRate, uint64_t nelem)
creates a bloom filter from a fasta structure.
Definition: bloom.c:501
int hasOverhead
Definition: bloom.h:64
uint64_t bfsizeBits
Definition: bloom.h:46
stores a processed kmer (2 bits pro nucleotide)
Definition: bloom.h:57
void init_LUTs()
look up table initialization
Definition: bloom.c:66
void multiHash(Bfkmer *ptr_bfkmer)
obtains the hashNum hashvalues for a compactified kmer
Definition: bloom.c:435
Bfilter * read_Bfilter(char *filterfile, char *paramfile)
reads a bloom filter from a file
Definition: bloom.c:587
int kmersize
Definition: bloom.h:42
double falsePosRate
Definition: bloom.h:45
int kmersize
Definition: bloom.h:58
bool insert_and_fetch(Bfilter *pr_bf, Bfkmer *ptr_bfkmer)
inserts the hashvalues of a kmer in filter
Definition: bloom.c:457
uint64_t nelem
Definition: bloom.h:48
uint64_t bfsizeBytes
Definition: bloom.h:47