btllib
 All Classes Namespaces Functions Variables
aahash.hpp
1 #ifndef BTLLIB_AAHASH_HPP
2 #define BTLLIB_AAHASH_HPP
3 
4 #include <cstdint>
5 #include <cstring>
6 #include <limits>
7 #include <memory>
8 #include <string>
9 #include <string_view>
10 #include <vector>
11 
12 namespace btllib {
13 
14 using SpacedSeed = std::vector<unsigned>;
15 
16 inline std::vector<SpacedSeed>
17 aa_parse_seeds(const std::vector<std::string>& seeds)
18 {
19  std::vector<SpacedSeed> seed_vec;
20  for (const auto& seed : seeds) {
21  SpacedSeed seed_vec_tmp;
22  for (const auto& c : seed) {
23  seed_vec_tmp.push_back((unsigned)(c - '0'));
24  }
25  seed_vec.push_back(seed_vec_tmp);
26  }
27  return seed_vec;
28 }
29 
30 class AAHash;
31 
32 class SeedAAHash;
33 
34 class AAHash
35 {
36  static constexpr const char* HASH_FN_NAME = "aahash1";
37 
38 private:
39  friend class SeedAAHash;
40 
42  bool init();
43 
44  const char* seq;
45  size_t seq_len;
46  const uint8_t hash_num;
47  const uint16_t k;
48  unsigned level;
49 
50  size_t pos;
51  bool initialized = false;
52  std::unique_ptr<uint64_t[]> hashes_array;
53 
54 public:
63  AAHash(std::string_view seq,
64  uint8_t hash_num,
65  uint16_t k,
66  unsigned level,
67  size_t pos = 0)
68  : seq(seq.data())
69  , seq_len(seq.size())
70  , hash_num(hash_num)
71  , k(k)
72  , level(level)
73  , pos(pos)
74  , hashes_array(new uint64_t[hash_num])
75  {
76  }
77 
78  AAHash(const AAHash& aahash)
79  : seq(aahash.seq)
80  , seq_len(aahash.seq_len)
81  , hash_num(aahash.hash_num)
82  , k(aahash.k)
83  , level(aahash.level)
84  , pos(aahash.pos)
85  , initialized(aahash.initialized)
86  , hashes_array(new uint64_t[hash_num])
87  {
88  std::memcpy(hashes_array.get(),
89  aahash.hashes_array.get(),
90  hash_num * sizeof(uint64_t));
91  }
92 
93  AAHash(AAHash&&) = default;
94 
108  bool roll();
109 
110  const uint64_t* hashes() const { return hashes_array.get(); }
111  size_t get_pos() const { return pos; }
112  unsigned get_hash_num() const { return hash_num; }
113  unsigned get_k() const { return k; }
114  uint64_t get_forward_hash() const { return hashes_array[0]; }
115  unsigned get_level() const { return level; }
116  const char* get_seq() const { return seq; }
117 };
118 
120 {
121 private:
122  AAHash aahash;
123  const unsigned hash_num_per_seed;
124  std::unique_ptr<uint64_t[]> hashes_array;
125  std::vector<SpacedSeed> seeds;
126  bool verify_seed();
128  void init();
129 
130 public:
142  SeedAAHash(const char* seq,
143  const std::vector<SpacedSeed>& seeds,
144  unsigned hash_num_per_seed,
145  unsigned k,
146  size_t pos = 0)
147  : aahash(seq, 1, k, 1, pos)
148  , hash_num_per_seed(hash_num_per_seed)
149  , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
150  , seeds(seeds)
151  {
152  init();
153  }
154  SeedAAHash(const std::string& seq,
155  const std::vector<SpacedSeed>& seeds,
156  unsigned hash_num_per_seed,
157  unsigned k,
158  size_t pos = 0)
159  : aahash(seq, 1, k, 1, pos)
160  , hash_num_per_seed(hash_num_per_seed)
161  , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
162  , seeds(seeds)
163  {
164  init();
165  }
166  SeedAAHash(const char* seq,
167  const std::vector<std::string>& seeds,
168  unsigned hash_num_per_seed,
169  unsigned k,
170  size_t pos = 0)
171  : aahash(seq, 1, k, 1, pos)
172  , hash_num_per_seed(hash_num_per_seed)
173  , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
174  , seeds(aa_parse_seeds(seeds))
175  {
176  init();
177  }
178  SeedAAHash(const std::string& seq,
179  const std::vector<std::string>& seeds,
180  unsigned hash_num_per_seed,
181  unsigned k,
182  size_t pos = 0)
183  : aahash(seq, 1, k, 1, pos)
184  , hash_num_per_seed(hash_num_per_seed)
185  , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
186  , seeds(aa_parse_seeds(seeds))
187  {
188  init();
189  }
190 
191  SeedAAHash(const SeedAAHash& seed_aahash)
192  : aahash(seed_aahash.aahash)
193  , hash_num_per_seed(seed_aahash.hash_num_per_seed)
194  , hashes_array(new uint64_t[hash_num_per_seed * seed_aahash.seeds.size()])
195  , seeds(seed_aahash.seeds)
196  {
197  std::memcpy(hashes_array.get(),
198  seed_aahash.hashes_array.get(),
199  hash_num_per_seed * seeds.size() * sizeof(uint64_t));
200  }
201  SeedAAHash(SeedAAHash&&) = default;
202 
216  bool roll();
217 
218  const uint64_t* hashes() const { return hashes_array.get(); }
219 
220  size_t get_pos() const { return aahash.get_pos(); }
221  unsigned get_hash_num() const { return aahash.get_hash_num(); }
222  unsigned get_hash_num_per_seed() const { return hash_num_per_seed; }
223  unsigned get_k() const { return aahash.get_k(); }
224 };
225 
226 } // namespace btllib
227 
228 #endif
AAHash(std::string_view seq, uint8_t hash_num, uint16_t k, unsigned level, size_t pos=0)
Definition: aahash.hpp:63
Definition: aahash.hpp:119
SeedAAHash(const char *seq, const std::vector< SpacedSeed > &seeds, unsigned hash_num_per_seed, unsigned k, size_t pos=0)
Definition: aahash.hpp:142
Definition: aahash.hpp:34