librsync  2.3.3
rabinkarp.c
1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 *
3 * rabinkarp -- The RabinKarp rolling checksum.
4 *
5 * Copyright (C) 2019 by Donovan Baarda <abo@minkirri.apana.org.au>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21#include "rabinkarp.h"
22
23/* Constant for RABINKARP_MULT^2. */
24#define RABINKARP_MULT2 0xa5b71959U
25
26/* Macros for doing 16 bytes with 2 mults that can be done in parallel. Testing
27 showed this as a performance sweet spot vs 16x1, 8x2, 4x4 1x16 alternative
28 arrangements. */
29#define PAR2X1(hash,buf,i) (RABINKARP_MULT2*(hash) + \
30 RABINKARP_MULT*buf[i] + \
31 buf[i+1])
32#define PAR2X2(hash,buf,i) PAR2X1(PAR2X1(hash,buf,i),buf,i+2)
33#define PAR2X4(hash,buf,i) PAR2X2(PAR2X2(hash,buf,i),buf,i+4)
34#define PAR2X8(hash,buf) PAR2X4(PAR2X4(hash,buf,0),buf,8)
35
36/* Table of RABINKARP_MULT^(2^(i+1)) for power lookups. */
37const static uint32_t RABINKARP_MULT_POW2[32] = {
38 0x08104225U,
39 0xa5b71959U,
40 0xf9c080f1U,
41 0x7c71e2e1U,
42 0x0bb409c1U,
43 0x4dc72381U,
44 0xd17a8701U,
45 0x96260e01U,
46 0x55101c01U,
47 0x2d303801U,
48 0x66a07001U,
49 0xfe40e001U,
50 0xc081c001U,
51 0x91038001U,
52 0x62070001U,
53 0xc40e0001U,
54 0x881c0001U,
55 0x10380001U,
56 0x20700001U,
57 0x40e00001U,
58 0x81c00001U,
59 0x03800001U,
60 0x07000001U,
61 0x0e000001U,
62 0x1c000001U,
63 0x38000001U,
64 0x70000001U,
65 0xe0000001U,
66 0xc0000001U,
67 0x80000001U,
68 0x00000001U,
69 0x00000001U
70};
71
72/* Get the value of RABINKARP_MULT^n. */
73static inline uint32_t rabinkarp_pow(uint32_t n)
74{
75 const uint32_t *m = RABINKARP_MULT_POW2;
76 uint32_t ans = 1;
77 while (n) {
78 if (n & 1) {
79 ans *= *m;
80 }
81 m++;
82 n >>= 1;
83 }
84 return ans;
85}
86
87void rabinkarp_update(rabinkarp_t *sum, const unsigned char *buf, size_t len)
88{
89 size_t n = len;
90 uint32_t hash = sum->hash;
91
92 while (n >= 16) {
93 hash = PAR2X8(hash, buf);
94 buf += 16;
95 n -= 16;
96 }
97 while (n) {
98 hash = RABINKARP_MULT * hash + *buf++;
99 n--;
100 }
101 sum->hash = hash;
102 sum->count += len;
103 sum->mult *= rabinkarp_pow((uint32_t)len);
104}
The rabinkarp class implementation of the RabinKarp rollsum.
#define RABINKARP_MULT
The RabinKarp multiplier.
Definition: rabinkarp.h:41
The rabinkarp_t state type.
Definition: rabinkarp.h:56
uint32_t mult
The value of RABINKARP_MULT^count.
Definition: rabinkarp.h:59
size_t count
Count of bytes included in sum.
Definition: rabinkarp.h:57
uint32_t hash
The accumulated hash value.
Definition: rabinkarp.h:58