diff options
| author | Sam Nystrom <sam@samnystrom.dev> | 2024-01-15 13:35:27 -0500 |
|---|---|---|
| committer | Sam Nystrom <sam@samnystrom.dev> | 2024-01-15 13:35:27 -0500 |
| commit | 2bdd00aa69b901e5230c9b8c24727011626ebeaa (patch) | |
| tree | 27967a3ccc64ac477cb0336f4e61282e8ab832ff | |
| -rw-r--r-- | .gitignore | 5 | ||||
| -rw-r--r-- | COPYING | 14 | ||||
| -rw-r--r-- | Makefile | 45 | ||||
| -rw-r--r-- | README.md | 13 | ||||
| -rw-r--r-- | cdb.5 | 53 | ||||
| -rw-r--r-- | cdbdump.1 | 39 | ||||
| -rw-r--r-- | cdbdump.c | 42 | ||||
| -rw-r--r-- | cdbget.1 | 48 | ||||
| -rw-r--r-- | cdbget.c | 36 | ||||
| -rw-r--r-- | cdbmake.1 | 43 | ||||
| -rw-r--r-- | cdbmake.c | 126 |
11 files changed, 464 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..97a3ce2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +cdbget +cdbdump +cdbmake +tags +core @@ -0,0 +1,14 @@ +Copyright (c) 2024 Sam Nystrom <sam@samnystrom.dev> + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..30472c8 --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +.POSIX: +.SUFFIXES: + +CFLAGS = -Os -Wall -Wextra -pedantic +LDFLAGS = -static +LIBS = -lskarnet + +PREFIX = /usr/local +BINDIR = $(PREFIX)/bin +SHAREDIR = $(PREFIX)/share +MANDIR = $(SHAREDIR)/man + +all: cdbget cdbdump cdbmake + +cdbget: cdbget.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS) + +cdbdump: cdbdump.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS) + +cdbmake: cdbmake.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS) + +clean: + rm -f cdbget cdbdump cdbmake + +install: + install -Dm755 cdbget $(BINDIR)/cdbget + install -Dm755 cdbdump $(BINDIR)/cdbdump + install -Dm755 cdbmake $(BINDIR)/cdbmake + install -Dm644 cdb.5 $(MANDIR)/man5/cdb.5 + install -Dm644 cdbget.1 $(MANDIR)/man1/cdbget.1 + install -Dm644 cdbdump.1 $(MANDIR)/man1/cdbdump.1 + install -Dm644 cdbmake.1 $(MANDIR)/man1/cdbmake.1 + +uninstall: + rm -f $(BINDIR)/cdbget + rm -f $(BINDIR)/cdbdump + rm -f $(BINDIR)/cdbmake + rm -f $(MANDIR)/man5/cdb.5 + rm -f $(MANDIR)/man1/cdbget.1 + rm -f $(MANDIR)/man1/cdbdump.1 + rm -f $(MANDIR)/man1/cdbmake.1 + +.PHONY: all clean install uninstall diff --git a/README.md b/README.md new file mode 100644 index 0000000..a65dcc9 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# cdb + +A reimplementation of D.J. Bernstein's [cdb tools](https://cr.yp.to/cdb.html) +using [skalibs](https://skarnet.org/software/skalibs/). + +# Installation + +1. `make` +2. `make install` + +# Usage + +See the man pages and the original [cdb page](https://cr.yp.to/cdb.html). @@ -0,0 +1,53 @@ +.Dd September 14, 1996 +.Dt CDB 5 +.Os +.Sh NAME +.Nm cdb +.Nd a structure for constant databases +.Sh DESCRIPTION +A +.Nm +is an associative array: it maps strings +.Pq Dq keys +to strings +.Pq Dq data . +.Pp +A cdb contains 256 pointers to linearly probed open hash tables. The +hash tables contain pointers to (key,data) pairs. A cdb is stored in +a single file on disk: +.Bd -literal + +----------------+---------+-------+-------+-----+---------+ + | p0 p1 ... p255 | records | hash0 | hash1 | ... | hash255 | + +----------------+---------+-------+-------+-----+---------+ +.Ed +.Pp +Each of the 256 initial pointers states a position and a length. The +position is the starting byte position of the hash table. The length +is the number of slots in the hash table. +.Pp +Records are stored sequentially, without special alignment. A record +states a key length, a data length, the key, and the data. +.Pp +Each hash table slot states a hash value and a byte position. If the +byte position is 0, the slot is empty. Otherwise, the slot points to +a record whose key has that hash value. +.Pp +Positions, lengths, and hash values are 32-bit quantities, stored in +little-endian form in 4 bytes. Thus a cdb must fit into 4 gigabytes. +.Pp +A record is located as follows. Compute the hash value of the key in +the record. The hash value modulo 256 is the number of a hash table. +The hash value divided by 256, modulo the length of that table, is a +slot number. Probe that slot, the next higher slot, and so on, until +you find the record or run into an empty slot. +.Pp +The cdb hash function is +.Ql h = ((h << 5) + h) ^ c , +with a starting hash of 5381. +.Sh SEE ALSO +.Xr cdbget 1 , +.Xr cdbdump 1 , +.Xr cdbmake 1 +.Sh AUTHORS +.An D. J. Bernstein +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac (man page port) diff --git a/cdbdump.1 b/cdbdump.1 new file mode 100644 index 0000000..3534ee2 --- /dev/null +++ b/cdbdump.1 @@ -0,0 +1,39 @@ +.Dd $Mdocdate$ +.Dt CDBDUMP 1 +.Os +.Sh NAME +.Nm cdbdump +.Nd print the contents of a cdb +.Sh SYNOPSIS +.Nm +< +.Ar CDB +.Sh DESCRIPTION +.Nm +prints the contents of a +.Xr cdb 5 +read from stdin in a simple, unambiguous format. +For each record in the cdb, +.Nm +writes a record to stdout of the form: +.Dl +klen,dlen:key->data +followed by a newline. +The keys and data may contain null bytes, newlines, +or any other arbitrary binary data. +.Sh EXIT STATUS +.Bl -tag +.It 0 +Success. +.It 100 +Wrong usage. +.It 111 +System call failed. +.El +.Sh SEE ALSO +.Xr cdb 5 , +.Xr cdbget 1 , +.Xr cdbmake 1 +.Sh AUTHORS +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac +.An D. J. Bernstein (cdb and cdbmake formats, original cdbdump program) +.An Laurent Bercot (cdb read/write library) diff --git a/cdbdump.c b/cdbdump.c new file mode 100644 index 0000000..7e77ef7 --- /dev/null +++ b/cdbdump.c @@ -0,0 +1,42 @@ +#include <skalibs/buffer.h> +#include <skalibs/cdb.h> +#include <skalibs/strerr.h> +#include <skalibs/uint32.h> + +int +main(int argc, char *argv[]) +{ + PROG = "cdbdump"; + if (argc != 1) + strerr_dieusage(100, "cdbdump < CDB"); + + cdb c = CDB_ZERO; + cdb_init_fromfd(&c, 0); + cdb_data key, data; + uint32_t pos = CDB_TRAVERSE_INIT(); + for (;;) { + int r = cdb_traverse_next(&c, &key, &data, &pos); + if (!r) break; + if (r < 0) + strerr_diefusys(111, "read cdb"); + + char uintbuf[sizeof("4294967295")]; + + buffer_put(buffer_1, "+", 1); + uintbuf[uint32_fmt(uintbuf, key.len)] = 0; + buffer_puts(buffer_1, uintbuf); + buffer_put(buffer_1, ",", 1); + uintbuf[uint32_fmt(uintbuf, data.len)] = 0; + buffer_puts(buffer_1, uintbuf); + buffer_put(buffer_1, ":", 1); + + buffer_put(buffer_1, key.s, key.len); + buffer_put(buffer_1, "->", 2); + buffer_put(buffer_1, data.s, data.len); + buffer_put(buffer_1, "\n", 1); + + buffer_flush(buffer_1); + } + cdb_free(&c); + return 0; +} diff --git a/cdbget.1 b/cdbget.1 new file mode 100644 index 0000000..f3e8350 --- /dev/null +++ b/cdbget.1 @@ -0,0 +1,48 @@ +.Dd $Mdocdate$ +.Dt CDBGET 1 +.Os +.Sh NAME +.Nm cdbget +.Nd print the data associated with one key in a cdb +.Sh SYNOPSIS +.Nm +.Ar KEY Op Ar SKIP +< +.Ar CDB +.Sh DESCRIPTION +.Nm +finds the record associated with a given +.Ar KEY +in a +.Xr cdb 5 +and writes the data in the record to stdout. +If a +.Ar SKIP +is given, +.Nm +skips the first +.Ar SKIP +records matching the given +.Ar KEY +and writes the data in the +.Ar SKIP Ns +1th +record to stdout. +.Sh EXIT STATUS +.Bl -tag +.It 0 +Success. +.It 1 +Key not found. +.It 100 +Wrong usage. +.It 111 +System call failed. +.El +.Sh SEE ALSO +.Xr cdb 5 , +.Xr cdbdump 1 , +.Xr cdbmake 1 +.Sh AUTHORS +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac +.An D. J. Bernstein (cdb and cdbmake formats, original cdbget program) +.An Laurent Bercot (cdb read/write library) diff --git a/cdbget.c b/cdbget.c new file mode 100644 index 0000000..b695930 --- /dev/null +++ b/cdbget.c @@ -0,0 +1,36 @@ +#include <string.h> +#include <skalibs/allreadwrite.h> +#include <skalibs/cdb.h> +#include <skalibs/strerr.h> +#include <skalibs/uint32.h> + +int +main(int argc, char *argv[]) +{ + PROG = "cdbget"; + if (argc < 2 || argc > 3) + strerr_dieusage(100, "cdbget KEY [SKIP]"); + + char *key = argv[1]; + uint32_t keylen = strlen(key); + uint32_t skip = 0; + if (argc == 3) { + if (!uint320_scan(argv[2], &skip)) + strerr_dief(111, "invalid integer \"", argv[2], "\""); + } + + cdb c = CDB_ZERO; + cdb_init_fromfd(&c, 0); + + cdb_find_state findstate; + cdb_findstart(&findstate); + cdb_data data; + for (skip++; skip > 0; skip--) { + if (cdb_findnext(&c, &data, key, keylen, &findstate) == 0) + return 1; + } + if (allwrite(1, data.s, data.len) != data.len) + strerr_diefusys(111, "write cdb data to stdout"); + cdb_free(&c); + return 0; +} diff --git a/cdbmake.1 b/cdbmake.1 new file mode 100644 index 0000000..0d3d014 --- /dev/null +++ b/cdbmake.1 @@ -0,0 +1,43 @@ +.Dd $Mdocdate$ +.Dt CDBMAKE 1 +.Os +.Sh NAME +.Nm cdbmake +.Nd create a cdb +.Sh SYNOPSIS +.Nm +.Ar CDB Ar TMP +.Sh DESCRIPTION +.Nm +reads records of the form +.Dl +klen,dlen:key->data\n +from stdin and creates a +.Xr cdb 5 +containing those records at +.Ar TMP . +After reaching EOF on stdin and writing the cdb to +.Ar TMP , +.Nm +renames +.Ar TMP +to +.Ar CDB , +atomically replacing +.Ar CDB . +.Sh EXIT STATUS +.Bl -tag +.It 0 +Success. +.It 100 +Wrong usage. +.It 111 +System call failed. +.El +.Sh SEE ALSO +.Xr cdb 5 , +.Xr cdbget 1 , +.Xr cdbdump 1 +.Sh AUTHORS +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac +.An D. J. Bernstein (cdb and cdbmake formats, original cdbmake program) +.An Laurent Bercot (cdb read/write library) diff --git a/cdbmake.c b/cdbmake.c new file mode 100644 index 0000000..c13420a --- /dev/null +++ b/cdbmake.c @@ -0,0 +1,126 @@ +#include <errno.h> +#include <stdio.h> +#include <skalibs/bytestr.h> +#include <skalibs/buffer.h> +#include <skalibs/cdbmake.h> +#include <skalibs/djbunix.h> +#include <skalibs/posixplz.h> +#include <skalibs/skamisc.h> +#include <skalibs/strerr.h> +#include <skalibs/uint32.h> + +static int +record_read(buffer *b, stralloc *key, stralloc *data, char **errmsg) +{ + char buf[sizeof("+4294967295,4294967295:")-1]; + size_t len = 0; + int r = getlnmax(b, buf, sizeof(buf), &len, ':'); + if (r <= 0) + return r; + if (buf[0] != '+') + return (*errmsg = "expected '+'", errno = EINVAL, -1); + + uint32_t keylen, datalen; + + size_t comma_pos = byte_chr(buf, len, ','); + if (comma_pos == len) + return (*errmsg = "expected ','", errno = EINVAL, -1); + buf[comma_pos] = 0; + if (!uint320_scan(buf + 1, &keylen)) + return (*errmsg = "could not parse key length", errno = EINVAL, -1); + buf[comma_pos] = ','; + + if (len < comma_pos + 1) + return (*errmsg = "expected data length after ','", errno = EINVAL, -1); + size_t colon_pos = byte_chr(buf, len, ':'); + if (colon_pos == len) + return (*errmsg = "expected ':'", errno = EINVAL, -1); + buf[colon_pos] = 0; + if (!uint320_scan(buf + comma_pos + 1, &datalen)) + return (*errmsg = "could not parse data length", errno = EINVAL, -1); + buf[colon_pos] = ':'; + + size_t w = 0; + if (!stralloc_ready(key, keylen)) + return (errno = ENOMEM, -1); + if ((r = buffer_getall(b, key->s, keylen, &w)) <= 0 && errno != EPIPE) + return -1; + if (w < keylen) + return (*errmsg = "unexpected EOF while reading key", errno = EINVAL, -1); + key->len = keylen; + w = 0; + if (buffer_getall(b, buf, 2, &w) <= 0 && errno != EPIPE) + return -1; + if (w < 2 || memcmp(buf, "->", 2)) + return (*errmsg = "expected '->'", errno = EINVAL, -1); + if (!stralloc_ready(data, datalen)) + return (errno = ENOMEM, -1); + w = 0; + if (buffer_getall(b, data->s, datalen, &w) <= 0 && errno != EPIPE) + return -1; + if (w < datalen) + return (*errmsg = "unexpected EOF while reading data", errno = EINVAL, -1); + data->len = datalen; + w = 0; + if (buffer_getall(b, buf, 1, &w) <= 0 && errno != EPIPE) + return -1; + if (w < 1 || buf[0] != '\n') + return (*errmsg = "expected '\\n'", errno = EINVAL, -1); + return 1; +} + +int +main(int argc, char *argv[]) +{ + PROG = "cdbmake"; + if (argc != 3) + strerr_dieusage(100, "cdbmake CDB TMP"); + char *cdb_path = argv[1]; + char *tmp_path = argv[2]; + + int tmp_fd = open_trunc(tmp_path); + if (tmp_fd < 0) + strerr_diefusys(111, "open ", tmp_path, " for writing"); + cdbmaker cm = CDBMAKER_ZERO; + if (!cdbmake_start(&cm, tmp_fd)) { + unlink_void(tmp_path); + strerr_diefusys(111, "cdbmake_start ", tmp_path); + } + + stralloc key = STRALLOC_ZERO; + stralloc data = STRALLOC_ZERO; + for (uint32_t record = 0;; ++record) { + char *errmsg = ""; + int r = record_read(buffer_0, &key, &data, &errmsg); + if (!r) break; + if (r < 0) { + unlink_void(tmp_path); + if (errno == EINVAL) { + char recordbuf[sizeof("4294967295")]; + recordbuf[uint32_fmt(recordbuf, record)] = 0; + strerr_dief(111, "syntax error on record ", recordbuf, ": ", errmsg); + } else { + strerr_diefusys(111, "read from stdin"); + } + } + if (!cdbmake_add(&cm, key.s, key.len, data.s, data.len)) { + unlink_void(tmp_path); + strerr_diefusys(111, "write cdb to ", tmp_path); + } + } + stralloc_free(&key); + stralloc_free(&data); + if (!cdbmake_finish(&cm)) { + unlink_void(tmp_path); + strerr_diefusys(111, "cdbmake_finish ", tmp_path); + } + if (fd_sync(tmp_fd) < 0) { + unlink_void(tmp_path); + strerr_diefusys(111, "fsync ", tmp_path); + } + if (rename(tmp_path, cdb_path) < 0) { + unlink_void(tmp_path); + strerr_diefusys(111, "rename ", tmp_path, " to ", cdb_path); + } + return 0; +} |
