From 2bdd00aa69b901e5230c9b8c24727011626ebeaa Mon Sep 17 00:00:00 2001 From: Sam Nystrom Date: Mon, 15 Jan 2024 13:35:27 -0500 Subject: init --- .gitignore | 5 +++ COPYING | 14 +++++++ Makefile | 45 ++++++++++++++++++++++ README.md | 13 +++++++ cdb.5 | 53 ++++++++++++++++++++++++++ cdbdump.1 | 39 +++++++++++++++++++ cdbdump.c | 42 +++++++++++++++++++++ cdbget.1 | 48 +++++++++++++++++++++++ cdbget.c | 36 ++++++++++++++++++ cdbmake.1 | 43 +++++++++++++++++++++ cdbmake.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 464 insertions(+) create mode 100644 .gitignore create mode 100644 COPYING create mode 100644 Makefile create mode 100644 README.md create mode 100644 cdb.5 create mode 100644 cdbdump.1 create mode 100644 cdbdump.c create mode 100644 cdbget.1 create mode 100644 cdbget.c create mode 100644 cdbmake.1 create mode 100644 cdbmake.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..97a3ce2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +cdbget +cdbdump +cdbmake +tags +core diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..e0c9153 --- /dev/null +++ b/COPYING @@ -0,0 +1,14 @@ +Copyright (c) 2024 Sam Nystrom + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..30472c8 --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +.POSIX: +.SUFFIXES: + +CFLAGS = -Os -Wall -Wextra -pedantic +LDFLAGS = -static +LIBS = -lskarnet + +PREFIX = /usr/local +BINDIR = $(PREFIX)/bin +SHAREDIR = $(PREFIX)/share +MANDIR = $(SHAREDIR)/man + +all: cdbget cdbdump cdbmake + +cdbget: cdbget.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS) + +cdbdump: cdbdump.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS) + +cdbmake: cdbmake.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS) + +clean: + rm -f cdbget cdbdump cdbmake + +install: + install -Dm755 cdbget $(BINDIR)/cdbget + install -Dm755 cdbdump $(BINDIR)/cdbdump + install -Dm755 cdbmake $(BINDIR)/cdbmake + install -Dm644 cdb.5 $(MANDIR)/man5/cdb.5 + install -Dm644 cdbget.1 $(MANDIR)/man1/cdbget.1 + install -Dm644 cdbdump.1 $(MANDIR)/man1/cdbdump.1 + install -Dm644 cdbmake.1 $(MANDIR)/man1/cdbmake.1 + +uninstall: + rm -f $(BINDIR)/cdbget + rm -f $(BINDIR)/cdbdump + rm -f $(BINDIR)/cdbmake + rm -f $(MANDIR)/man5/cdb.5 + rm -f $(MANDIR)/man1/cdbget.1 + rm -f $(MANDIR)/man1/cdbdump.1 + rm -f $(MANDIR)/man1/cdbmake.1 + +.PHONY: all clean install uninstall diff --git a/README.md b/README.md new file mode 100644 index 0000000..a65dcc9 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# cdb + +A reimplementation of D.J. Bernstein's [cdb tools](https://cr.yp.to/cdb.html) +using [skalibs](https://skarnet.org/software/skalibs/). + +# Installation + +1. `make` +2. `make install` + +# Usage + +See the man pages and the original [cdb page](https://cr.yp.to/cdb.html). diff --git a/cdb.5 b/cdb.5 new file mode 100644 index 0000000..1ef87dc --- /dev/null +++ b/cdb.5 @@ -0,0 +1,53 @@ +.Dd September 14, 1996 +.Dt CDB 5 +.Os +.Sh NAME +.Nm cdb +.Nd a structure for constant databases +.Sh DESCRIPTION +A +.Nm +is an associative array: it maps strings +.Pq Dq keys +to strings +.Pq Dq data . +.Pp +A cdb contains 256 pointers to linearly probed open hash tables. The +hash tables contain pointers to (key,data) pairs. A cdb is stored in +a single file on disk: +.Bd -literal + +----------------+---------+-------+-------+-----+---------+ + | p0 p1 ... p255 | records | hash0 | hash1 | ... | hash255 | + +----------------+---------+-------+-------+-----+---------+ +.Ed +.Pp +Each of the 256 initial pointers states a position and a length. The +position is the starting byte position of the hash table. The length +is the number of slots in the hash table. +.Pp +Records are stored sequentially, without special alignment. A record +states a key length, a data length, the key, and the data. +.Pp +Each hash table slot states a hash value and a byte position. If the +byte position is 0, the slot is empty. Otherwise, the slot points to +a record whose key has that hash value. +.Pp +Positions, lengths, and hash values are 32-bit quantities, stored in +little-endian form in 4 bytes. Thus a cdb must fit into 4 gigabytes. +.Pp +A record is located as follows. Compute the hash value of the key in +the record. The hash value modulo 256 is the number of a hash table. +The hash value divided by 256, modulo the length of that table, is a +slot number. Probe that slot, the next higher slot, and so on, until +you find the record or run into an empty slot. +.Pp +The cdb hash function is +.Ql h = ((h << 5) + h) ^ c , +with a starting hash of 5381. +.Sh SEE ALSO +.Xr cdbget 1 , +.Xr cdbdump 1 , +.Xr cdbmake 1 +.Sh AUTHORS +.An D. J. Bernstein +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac (man page port) diff --git a/cdbdump.1 b/cdbdump.1 new file mode 100644 index 0000000..3534ee2 --- /dev/null +++ b/cdbdump.1 @@ -0,0 +1,39 @@ +.Dd $Mdocdate$ +.Dt CDBDUMP 1 +.Os +.Sh NAME +.Nm cdbdump +.Nd print the contents of a cdb +.Sh SYNOPSIS +.Nm +< +.Ar CDB +.Sh DESCRIPTION +.Nm +prints the contents of a +.Xr cdb 5 +read from stdin in a simple, unambiguous format. +For each record in the cdb, +.Nm +writes a record to stdout of the form: +.Dl +klen,dlen:key->data +followed by a newline. +The keys and data may contain null bytes, newlines, +or any other arbitrary binary data. +.Sh EXIT STATUS +.Bl -tag +.It 0 +Success. +.It 100 +Wrong usage. +.It 111 +System call failed. +.El +.Sh SEE ALSO +.Xr cdb 5 , +.Xr cdbget 1 , +.Xr cdbmake 1 +.Sh AUTHORS +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac +.An D. J. Bernstein (cdb and cdbmake formats, original cdbdump program) +.An Laurent Bercot (cdb read/write library) diff --git a/cdbdump.c b/cdbdump.c new file mode 100644 index 0000000..7e77ef7 --- /dev/null +++ b/cdbdump.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + PROG = "cdbdump"; + if (argc != 1) + strerr_dieusage(100, "cdbdump < CDB"); + + cdb c = CDB_ZERO; + cdb_init_fromfd(&c, 0); + cdb_data key, data; + uint32_t pos = CDB_TRAVERSE_INIT(); + for (;;) { + int r = cdb_traverse_next(&c, &key, &data, &pos); + if (!r) break; + if (r < 0) + strerr_diefusys(111, "read cdb"); + + char uintbuf[sizeof("4294967295")]; + + buffer_put(buffer_1, "+", 1); + uintbuf[uint32_fmt(uintbuf, key.len)] = 0; + buffer_puts(buffer_1, uintbuf); + buffer_put(buffer_1, ",", 1); + uintbuf[uint32_fmt(uintbuf, data.len)] = 0; + buffer_puts(buffer_1, uintbuf); + buffer_put(buffer_1, ":", 1); + + buffer_put(buffer_1, key.s, key.len); + buffer_put(buffer_1, "->", 2); + buffer_put(buffer_1, data.s, data.len); + buffer_put(buffer_1, "\n", 1); + + buffer_flush(buffer_1); + } + cdb_free(&c); + return 0; +} diff --git a/cdbget.1 b/cdbget.1 new file mode 100644 index 0000000..f3e8350 --- /dev/null +++ b/cdbget.1 @@ -0,0 +1,48 @@ +.Dd $Mdocdate$ +.Dt CDBGET 1 +.Os +.Sh NAME +.Nm cdbget +.Nd print the data associated with one key in a cdb +.Sh SYNOPSIS +.Nm +.Ar KEY Op Ar SKIP +< +.Ar CDB +.Sh DESCRIPTION +.Nm +finds the record associated with a given +.Ar KEY +in a +.Xr cdb 5 +and writes the data in the record to stdout. +If a +.Ar SKIP +is given, +.Nm +skips the first +.Ar SKIP +records matching the given +.Ar KEY +and writes the data in the +.Ar SKIP Ns +1th +record to stdout. +.Sh EXIT STATUS +.Bl -tag +.It 0 +Success. +.It 1 +Key not found. +.It 100 +Wrong usage. +.It 111 +System call failed. +.El +.Sh SEE ALSO +.Xr cdb 5 , +.Xr cdbdump 1 , +.Xr cdbmake 1 +.Sh AUTHORS +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac +.An D. J. Bernstein (cdb and cdbmake formats, original cdbget program) +.An Laurent Bercot (cdb read/write library) diff --git a/cdbget.c b/cdbget.c new file mode 100644 index 0000000..b695930 --- /dev/null +++ b/cdbget.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + PROG = "cdbget"; + if (argc < 2 || argc > 3) + strerr_dieusage(100, "cdbget KEY [SKIP]"); + + char *key = argv[1]; + uint32_t keylen = strlen(key); + uint32_t skip = 0; + if (argc == 3) { + if (!uint320_scan(argv[2], &skip)) + strerr_dief(111, "invalid integer \"", argv[2], "\""); + } + + cdb c = CDB_ZERO; + cdb_init_fromfd(&c, 0); + + cdb_find_state findstate; + cdb_findstart(&findstate); + cdb_data data; + for (skip++; skip > 0; skip--) { + if (cdb_findnext(&c, &data, key, keylen, &findstate) == 0) + return 1; + } + if (allwrite(1, data.s, data.len) != data.len) + strerr_diefusys(111, "write cdb data to stdout"); + cdb_free(&c); + return 0; +} diff --git a/cdbmake.1 b/cdbmake.1 new file mode 100644 index 0000000..0d3d014 --- /dev/null +++ b/cdbmake.1 @@ -0,0 +1,43 @@ +.Dd $Mdocdate$ +.Dt CDBMAKE 1 +.Os +.Sh NAME +.Nm cdbmake +.Nd create a cdb +.Sh SYNOPSIS +.Nm +.Ar CDB Ar TMP +.Sh DESCRIPTION +.Nm +reads records of the form +.Dl +klen,dlen:key->data\n +from stdin and creates a +.Xr cdb 5 +containing those records at +.Ar TMP . +After reaching EOF on stdin and writing the cdb to +.Ar TMP , +.Nm +renames +.Ar TMP +to +.Ar CDB , +atomically replacing +.Ar CDB . +.Sh EXIT STATUS +.Bl -tag +.It 0 +Success. +.It 100 +Wrong usage. +.It 111 +System call failed. +.El +.Sh SEE ALSO +.Xr cdb 5 , +.Xr cdbget 1 , +.Xr cdbdump 1 +.Sh AUTHORS +.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac +.An D. J. Bernstein (cdb and cdbmake formats, original cdbmake program) +.An Laurent Bercot (cdb read/write library) diff --git a/cdbmake.c b/cdbmake.c new file mode 100644 index 0000000..c13420a --- /dev/null +++ b/cdbmake.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int +record_read(buffer *b, stralloc *key, stralloc *data, char **errmsg) +{ + char buf[sizeof("+4294967295,4294967295:")-1]; + size_t len = 0; + int r = getlnmax(b, buf, sizeof(buf), &len, ':'); + if (r <= 0) + return r; + if (buf[0] != '+') + return (*errmsg = "expected '+'", errno = EINVAL, -1); + + uint32_t keylen, datalen; + + size_t comma_pos = byte_chr(buf, len, ','); + if (comma_pos == len) + return (*errmsg = "expected ','", errno = EINVAL, -1); + buf[comma_pos] = 0; + if (!uint320_scan(buf + 1, &keylen)) + return (*errmsg = "could not parse key length", errno = EINVAL, -1); + buf[comma_pos] = ','; + + if (len < comma_pos + 1) + return (*errmsg = "expected data length after ','", errno = EINVAL, -1); + size_t colon_pos = byte_chr(buf, len, ':'); + if (colon_pos == len) + return (*errmsg = "expected ':'", errno = EINVAL, -1); + buf[colon_pos] = 0; + if (!uint320_scan(buf + comma_pos + 1, &datalen)) + return (*errmsg = "could not parse data length", errno = EINVAL, -1); + buf[colon_pos] = ':'; + + size_t w = 0; + if (!stralloc_ready(key, keylen)) + return (errno = ENOMEM, -1); + if ((r = buffer_getall(b, key->s, keylen, &w)) <= 0 && errno != EPIPE) + return -1; + if (w < keylen) + return (*errmsg = "unexpected EOF while reading key", errno = EINVAL, -1); + key->len = keylen; + w = 0; + if (buffer_getall(b, buf, 2, &w) <= 0 && errno != EPIPE) + return -1; + if (w < 2 || memcmp(buf, "->", 2)) + return (*errmsg = "expected '->'", errno = EINVAL, -1); + if (!stralloc_ready(data, datalen)) + return (errno = ENOMEM, -1); + w = 0; + if (buffer_getall(b, data->s, datalen, &w) <= 0 && errno != EPIPE) + return -1; + if (w < datalen) + return (*errmsg = "unexpected EOF while reading data", errno = EINVAL, -1); + data->len = datalen; + w = 0; + if (buffer_getall(b, buf, 1, &w) <= 0 && errno != EPIPE) + return -1; + if (w < 1 || buf[0] != '\n') + return (*errmsg = "expected '\\n'", errno = EINVAL, -1); + return 1; +} + +int +main(int argc, char *argv[]) +{ + PROG = "cdbmake"; + if (argc != 3) + strerr_dieusage(100, "cdbmake CDB TMP"); + char *cdb_path = argv[1]; + char *tmp_path = argv[2]; + + int tmp_fd = open_trunc(tmp_path); + if (tmp_fd < 0) + strerr_diefusys(111, "open ", tmp_path, " for writing"); + cdbmaker cm = CDBMAKER_ZERO; + if (!cdbmake_start(&cm, tmp_fd)) { + unlink_void(tmp_path); + strerr_diefusys(111, "cdbmake_start ", tmp_path); + } + + stralloc key = STRALLOC_ZERO; + stralloc data = STRALLOC_ZERO; + for (uint32_t record = 0;; ++record) { + char *errmsg = ""; + int r = record_read(buffer_0, &key, &data, &errmsg); + if (!r) break; + if (r < 0) { + unlink_void(tmp_path); + if (errno == EINVAL) { + char recordbuf[sizeof("4294967295")]; + recordbuf[uint32_fmt(recordbuf, record)] = 0; + strerr_dief(111, "syntax error on record ", recordbuf, ": ", errmsg); + } else { + strerr_diefusys(111, "read from stdin"); + } + } + if (!cdbmake_add(&cm, key.s, key.len, data.s, data.len)) { + unlink_void(tmp_path); + strerr_diefusys(111, "write cdb to ", tmp_path); + } + } + stralloc_free(&key); + stralloc_free(&data); + if (!cdbmake_finish(&cm)) { + unlink_void(tmp_path); + strerr_diefusys(111, "cdbmake_finish ", tmp_path); + } + if (fd_sync(tmp_fd) < 0) { + unlink_void(tmp_path); + strerr_diefusys(111, "fsync ", tmp_path); + } + if (rename(tmp_path, cdb_path) < 0) { + unlink_void(tmp_path); + strerr_diefusys(111, "rename ", tmp_path, " to ", cdb_path); + } + return 0; +} -- cgit v1.2.3