librsync  2.3.0
rabinkarp.h
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * rabinkarp -- The RabinKarp rolling checksum.
4  *
5  * Copyright (C) 2019 by Donovan Baarda <abo@minkirri.apana.org.au>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 #ifndef _RABINKARP_H_
22 # define _RABINKARP_H_
23 
24 # include <stddef.h>
25 # include <stdint.h>
26 
27 /** The RabinKarp seed value.
28  *
29  * The seed ensures different length zero blocks have different hashes. It
30  * effectively encodes the length into the hash. */
31 # define RABINKARP_SEED 1
32 
33 /** The RabinKarp multiplier.
34  *
35  * This multiplier has a bit pattern of 1's getting sparser with significance,
36  * is the product of 2 large primes, and matches the characterstics for a good
37  * LCG multiplier. */
38 # define RABINKARP_MULT 0x08104225
39 
40 /** The RabinKarp inverse multiplier.
41  *
42  * This is the inverse of RABINKARP_MULT modular 2^32. Multiplying by this is
43  * equivalent to dividing by RABINKARP_MULT. */
44 # define RABINKARP_INVM 0x98f009ad
45 
46 /** The RabinKarp seed adjustment.
47  *
48  * This is a factor used to adjust for the seed when rolling out values. It's
49  * equal to; (RABINKARP_MULT - 1) * RABINKARP_SEED */
50 # define RABINKARP_ADJ 0x08104224
51 
52 /** The rabinkarp_t state type. */
53 typedef struct _rabinkarp {
54  size_t count; /**< Count of bytes included in sum. */
55  uint32_t hash; /**< The accumulated hash value. */
56  uint32_t mult; /**< The value of RABINKARP_MULT^count. */
57 } rabinkarp_t;
58 
59 static inline uint32_t uint32_pow(uint32_t m, size_t p)
60 {
61  uint32_t ans = 1;
62  while (p) {
63  if (p & 1) {
64  ans *= m;
65  }
66  m *= m;
67  p >>= 1;
68  }
69  return ans;
70 }
71 
72 static inline void rabinkarp_init(rabinkarp_t *sum)
73 {
74  sum->count = 0;
75  sum->hash = RABINKARP_SEED;
76  sum->mult = 1;
77 }
78 
79 static inline void rabinkarp_update(rabinkarp_t *sum, const unsigned char *buf,
80  size_t len)
81 {
82  for (size_t i = len; i; i--) {
83  sum->hash = sum->hash * RABINKARP_MULT + *buf++;
84  }
85  sum->count += len;
86  sum->mult *= uint32_pow(RABINKARP_MULT, len);
87 }
88 
89 static inline void rabinkarp_rotate(rabinkarp_t *sum, unsigned char out,
90  unsigned char in)
91 {
92  sum->hash =
93  sum->hash * RABINKARP_MULT + in - sum->mult * (out + RABINKARP_ADJ);
94 }
95 
96 static inline void rabinkarp_rollin(rabinkarp_t *sum, unsigned char in)
97 {
98  sum->hash = sum->hash * RABINKARP_MULT + in;
99  sum->count++;
100  sum->mult *= RABINKARP_MULT;
101 }
102 
103 static inline void rabinkarp_rollout(rabinkarp_t *sum, unsigned char out)
104 {
105  sum->count--;
106  sum->mult *= RABINKARP_INVM;
107  sum->hash -= sum->mult * (out + RABINKARP_ADJ);
108 }
109 
110 static inline uint32_t rabinkarp_digest(rabinkarp_t *sum)
111 {
112  return sum->hash;
113 }
114 
115 #endif /* _RABINKARP_H_ */