summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore5
-rw-r--r--COPYING14
-rw-r--r--Makefile45
-rw-r--r--README.md13
-rw-r--r--cdb.553
-rw-r--r--cdbdump.139
-rw-r--r--cdbdump.c42
-rw-r--r--cdbget.148
-rw-r--r--cdbget.c36
-rw-r--r--cdbmake.143
-rw-r--r--cdbmake.c126
11 files changed, 464 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..97a3ce2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+cdbget
+cdbdump
+cdbmake
+tags
+core
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..e0c9153
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,14 @@
+Copyright (c) 2024 Sam Nystrom <sam@samnystrom.dev>
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..30472c8
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,45 @@
+.POSIX:
+.SUFFIXES:
+
+CFLAGS = -Os -Wall -Wextra -pedantic
+LDFLAGS = -static
+LIBS = -lskarnet
+
+PREFIX = /usr/local
+BINDIR = $(PREFIX)/bin
+SHAREDIR = $(PREFIX)/share
+MANDIR = $(SHAREDIR)/man
+
+all: cdbget cdbdump cdbmake
+
+cdbget: cdbget.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS)
+
+cdbdump: cdbdump.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS)
+
+cdbmake: cdbmake.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LIBS)
+
+clean:
+ rm -f cdbget cdbdump cdbmake
+
+install:
+ install -Dm755 cdbget $(BINDIR)/cdbget
+ install -Dm755 cdbdump $(BINDIR)/cdbdump
+ install -Dm755 cdbmake $(BINDIR)/cdbmake
+ install -Dm644 cdb.5 $(MANDIR)/man5/cdb.5
+ install -Dm644 cdbget.1 $(MANDIR)/man1/cdbget.1
+ install -Dm644 cdbdump.1 $(MANDIR)/man1/cdbdump.1
+ install -Dm644 cdbmake.1 $(MANDIR)/man1/cdbmake.1
+
+uninstall:
+ rm -f $(BINDIR)/cdbget
+ rm -f $(BINDIR)/cdbdump
+ rm -f $(BINDIR)/cdbmake
+ rm -f $(MANDIR)/man5/cdb.5
+ rm -f $(MANDIR)/man1/cdbget.1
+ rm -f $(MANDIR)/man1/cdbdump.1
+ rm -f $(MANDIR)/man1/cdbmake.1
+
+.PHONY: all clean install uninstall
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a65dcc9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+# cdb
+
+A reimplementation of D.J. Bernstein's [cdb tools](https://cr.yp.to/cdb.html)
+using [skalibs](https://skarnet.org/software/skalibs/).
+
+# Installation
+
+1. `make`
+2. `make install`
+
+# Usage
+
+See the man pages and the original [cdb page](https://cr.yp.to/cdb.html).
diff --git a/cdb.5 b/cdb.5
new file mode 100644
index 0000000..1ef87dc
--- /dev/null
+++ b/cdb.5
@@ -0,0 +1,53 @@
+.Dd September 14, 1996
+.Dt CDB 5
+.Os
+.Sh NAME
+.Nm cdb
+.Nd a structure for constant databases
+.Sh DESCRIPTION
+A
+.Nm
+is an associative array: it maps strings
+.Pq Dq keys
+to strings
+.Pq Dq data .
+.Pp
+A cdb contains 256 pointers to linearly probed open hash tables. The
+hash tables contain pointers to (key,data) pairs. A cdb is stored in
+a single file on disk:
+.Bd -literal
+ +----------------+---------+-------+-------+-----+---------+
+ | p0 p1 ... p255 | records | hash0 | hash1 | ... | hash255 |
+ +----------------+---------+-------+-------+-----+---------+
+.Ed
+.Pp
+Each of the 256 initial pointers states a position and a length. The
+position is the starting byte position of the hash table. The length
+is the number of slots in the hash table.
+.Pp
+Records are stored sequentially, without special alignment. A record
+states a key length, a data length, the key, and the data.
+.Pp
+Each hash table slot states a hash value and a byte position. If the
+byte position is 0, the slot is empty. Otherwise, the slot points to
+a record whose key has that hash value.
+.Pp
+Positions, lengths, and hash values are 32-bit quantities, stored in
+little-endian form in 4 bytes. Thus a cdb must fit into 4 gigabytes.
+.Pp
+A record is located as follows. Compute the hash value of the key in
+the record. The hash value modulo 256 is the number of a hash table.
+The hash value divided by 256, modulo the length of that table, is a
+slot number. Probe that slot, the next higher slot, and so on, until
+you find the record or run into an empty slot.
+.Pp
+The cdb hash function is
+.Ql h = ((h << 5) + h) ^ c ,
+with a starting hash of 5381.
+.Sh SEE ALSO
+.Xr cdbget 1 ,
+.Xr cdbdump 1 ,
+.Xr cdbmake 1
+.Sh AUTHORS
+.An D. J. Bernstein
+.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac (man page port)
diff --git a/cdbdump.1 b/cdbdump.1
new file mode 100644
index 0000000..3534ee2
--- /dev/null
+++ b/cdbdump.1
@@ -0,0 +1,39 @@
+.Dd $Mdocdate$
+.Dt CDBDUMP 1
+.Os
+.Sh NAME
+.Nm cdbdump
+.Nd print the contents of a cdb
+.Sh SYNOPSIS
+.Nm
+<
+.Ar CDB
+.Sh DESCRIPTION
+.Nm
+prints the contents of a
+.Xr cdb 5
+read from stdin in a simple, unambiguous format.
+For each record in the cdb,
+.Nm
+writes a record to stdout of the form:
+.Dl +klen,dlen:key->data
+followed by a newline.
+The keys and data may contain null bytes, newlines,
+or any other arbitrary binary data.
+.Sh EXIT STATUS
+.Bl -tag
+.It 0
+Success.
+.It 100
+Wrong usage.
+.It 111
+System call failed.
+.El
+.Sh SEE ALSO
+.Xr cdb 5 ,
+.Xr cdbget 1 ,
+.Xr cdbmake 1
+.Sh AUTHORS
+.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac
+.An D. J. Bernstein (cdb and cdbmake formats, original cdbdump program)
+.An Laurent Bercot (cdb read/write library)
diff --git a/cdbdump.c b/cdbdump.c
new file mode 100644
index 0000000..7e77ef7
--- /dev/null
+++ b/cdbdump.c
@@ -0,0 +1,42 @@
+#include <skalibs/buffer.h>
+#include <skalibs/cdb.h>
+#include <skalibs/strerr.h>
+#include <skalibs/uint32.h>
+
+int
+main(int argc, char *argv[])
+{
+ PROG = "cdbdump";
+ if (argc != 1)
+ strerr_dieusage(100, "cdbdump < CDB");
+
+ cdb c = CDB_ZERO;
+ cdb_init_fromfd(&c, 0);
+ cdb_data key, data;
+ uint32_t pos = CDB_TRAVERSE_INIT();
+ for (;;) {
+ int r = cdb_traverse_next(&c, &key, &data, &pos);
+ if (!r) break;
+ if (r < 0)
+ strerr_diefusys(111, "read cdb");
+
+ char uintbuf[sizeof("4294967295")];
+
+ buffer_put(buffer_1, "+", 1);
+ uintbuf[uint32_fmt(uintbuf, key.len)] = 0;
+ buffer_puts(buffer_1, uintbuf);
+ buffer_put(buffer_1, ",", 1);
+ uintbuf[uint32_fmt(uintbuf, data.len)] = 0;
+ buffer_puts(buffer_1, uintbuf);
+ buffer_put(buffer_1, ":", 1);
+
+ buffer_put(buffer_1, key.s, key.len);
+ buffer_put(buffer_1, "->", 2);
+ buffer_put(buffer_1, data.s, data.len);
+ buffer_put(buffer_1, "\n", 1);
+
+ buffer_flush(buffer_1);
+ }
+ cdb_free(&c);
+ return 0;
+}
diff --git a/cdbget.1 b/cdbget.1
new file mode 100644
index 0000000..f3e8350
--- /dev/null
+++ b/cdbget.1
@@ -0,0 +1,48 @@
+.Dd $Mdocdate$
+.Dt CDBGET 1
+.Os
+.Sh NAME
+.Nm cdbget
+.Nd print the data associated with one key in a cdb
+.Sh SYNOPSIS
+.Nm
+.Ar KEY Op Ar SKIP
+<
+.Ar CDB
+.Sh DESCRIPTION
+.Nm
+finds the record associated with a given
+.Ar KEY
+in a
+.Xr cdb 5
+and writes the data in the record to stdout.
+If a
+.Ar SKIP
+is given,
+.Nm
+skips the first
+.Ar SKIP
+records matching the given
+.Ar KEY
+and writes the data in the
+.Ar SKIP Ns +1th
+record to stdout.
+.Sh EXIT STATUS
+.Bl -tag
+.It 0
+Success.
+.It 1
+Key not found.
+.It 100
+Wrong usage.
+.It 111
+System call failed.
+.El
+.Sh SEE ALSO
+.Xr cdb 5 ,
+.Xr cdbdump 1 ,
+.Xr cdbmake 1
+.Sh AUTHORS
+.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac
+.An D. J. Bernstein (cdb and cdbmake formats, original cdbget program)
+.An Laurent Bercot (cdb read/write library)
diff --git a/cdbget.c b/cdbget.c
new file mode 100644
index 0000000..b695930
--- /dev/null
+++ b/cdbget.c
@@ -0,0 +1,36 @@
+#include <string.h>
+#include <skalibs/allreadwrite.h>
+#include <skalibs/cdb.h>
+#include <skalibs/strerr.h>
+#include <skalibs/uint32.h>
+
+int
+main(int argc, char *argv[])
+{
+ PROG = "cdbget";
+ if (argc < 2 || argc > 3)
+ strerr_dieusage(100, "cdbget KEY [SKIP]");
+
+ char *key = argv[1];
+ uint32_t keylen = strlen(key);
+ uint32_t skip = 0;
+ if (argc == 3) {
+ if (!uint320_scan(argv[2], &skip))
+ strerr_dief(111, "invalid integer \"", argv[2], "\"");
+ }
+
+ cdb c = CDB_ZERO;
+ cdb_init_fromfd(&c, 0);
+
+ cdb_find_state findstate;
+ cdb_findstart(&findstate);
+ cdb_data data;
+ for (skip++; skip > 0; skip--) {
+ if (cdb_findnext(&c, &data, key, keylen, &findstate) == 0)
+ return 1;
+ }
+ if (allwrite(1, data.s, data.len) != data.len)
+ strerr_diefusys(111, "write cdb data to stdout");
+ cdb_free(&c);
+ return 0;
+}
diff --git a/cdbmake.1 b/cdbmake.1
new file mode 100644
index 0000000..0d3d014
--- /dev/null
+++ b/cdbmake.1
@@ -0,0 +1,43 @@
+.Dd $Mdocdate$
+.Dt CDBMAKE 1
+.Os
+.Sh NAME
+.Nm cdbmake
+.Nd create a cdb
+.Sh SYNOPSIS
+.Nm
+.Ar CDB Ar TMP
+.Sh DESCRIPTION
+.Nm
+reads records of the form
+.Dl +klen,dlen:key->data\n
+from stdin and creates a
+.Xr cdb 5
+containing those records at
+.Ar TMP .
+After reaching EOF on stdin and writing the cdb to
+.Ar TMP ,
+.Nm
+renames
+.Ar TMP
+to
+.Ar CDB ,
+atomically replacing
+.Ar CDB .
+.Sh EXIT STATUS
+.Bl -tag
+.It 0
+Success.
+.It 100
+Wrong usage.
+.It 111
+System call failed.
+.El
+.Sh SEE ALSO
+.Xr cdb 5 ,
+.Xr cdbget 1 ,
+.Xr cdbdump 1
+.Sh AUTHORS
+.An Sam Nystrom Ao Mt sam@samnystrom.dev Ac
+.An D. J. Bernstein (cdb and cdbmake formats, original cdbmake program)
+.An Laurent Bercot (cdb read/write library)
diff --git a/cdbmake.c b/cdbmake.c
new file mode 100644
index 0000000..c13420a
--- /dev/null
+++ b/cdbmake.c
@@ -0,0 +1,126 @@
+#include <errno.h>
+#include <stdio.h>
+#include <skalibs/bytestr.h>
+#include <skalibs/buffer.h>
+#include <skalibs/cdbmake.h>
+#include <skalibs/djbunix.h>
+#include <skalibs/posixplz.h>
+#include <skalibs/skamisc.h>
+#include <skalibs/strerr.h>
+#include <skalibs/uint32.h>
+
+static int
+record_read(buffer *b, stralloc *key, stralloc *data, char **errmsg)
+{
+ char buf[sizeof("+4294967295,4294967295:")-1];
+ size_t len = 0;
+ int r = getlnmax(b, buf, sizeof(buf), &len, ':');
+ if (r <= 0)
+ return r;
+ if (buf[0] != '+')
+ return (*errmsg = "expected '+'", errno = EINVAL, -1);
+
+ uint32_t keylen, datalen;
+
+ size_t comma_pos = byte_chr(buf, len, ',');
+ if (comma_pos == len)
+ return (*errmsg = "expected ','", errno = EINVAL, -1);
+ buf[comma_pos] = 0;
+ if (!uint320_scan(buf + 1, &keylen))
+ return (*errmsg = "could not parse key length", errno = EINVAL, -1);
+ buf[comma_pos] = ',';
+
+ if (len < comma_pos + 1)
+ return (*errmsg = "expected data length after ','", errno = EINVAL, -1);
+ size_t colon_pos = byte_chr(buf, len, ':');
+ if (colon_pos == len)
+ return (*errmsg = "expected ':'", errno = EINVAL, -1);
+ buf[colon_pos] = 0;
+ if (!uint320_scan(buf + comma_pos + 1, &datalen))
+ return (*errmsg = "could not parse data length", errno = EINVAL, -1);
+ buf[colon_pos] = ':';
+
+ size_t w = 0;
+ if (!stralloc_ready(key, keylen))
+ return (errno = ENOMEM, -1);
+ if ((r = buffer_getall(b, key->s, keylen, &w)) <= 0 && errno != EPIPE)
+ return -1;
+ if (w < keylen)
+ return (*errmsg = "unexpected EOF while reading key", errno = EINVAL, -1);
+ key->len = keylen;
+ w = 0;
+ if (buffer_getall(b, buf, 2, &w) <= 0 && errno != EPIPE)
+ return -1;
+ if (w < 2 || memcmp(buf, "->", 2))
+ return (*errmsg = "expected '->'", errno = EINVAL, -1);
+ if (!stralloc_ready(data, datalen))
+ return (errno = ENOMEM, -1);
+ w = 0;
+ if (buffer_getall(b, data->s, datalen, &w) <= 0 && errno != EPIPE)
+ return -1;
+ if (w < datalen)
+ return (*errmsg = "unexpected EOF while reading data", errno = EINVAL, -1);
+ data->len = datalen;
+ w = 0;
+ if (buffer_getall(b, buf, 1, &w) <= 0 && errno != EPIPE)
+ return -1;
+ if (w < 1 || buf[0] != '\n')
+ return (*errmsg = "expected '\\n'", errno = EINVAL, -1);
+ return 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ PROG = "cdbmake";
+ if (argc != 3)
+ strerr_dieusage(100, "cdbmake CDB TMP");
+ char *cdb_path = argv[1];
+ char *tmp_path = argv[2];
+
+ int tmp_fd = open_trunc(tmp_path);
+ if (tmp_fd < 0)
+ strerr_diefusys(111, "open ", tmp_path, " for writing");
+ cdbmaker cm = CDBMAKER_ZERO;
+ if (!cdbmake_start(&cm, tmp_fd)) {
+ unlink_void(tmp_path);
+ strerr_diefusys(111, "cdbmake_start ", tmp_path);
+ }
+
+ stralloc key = STRALLOC_ZERO;
+ stralloc data = STRALLOC_ZERO;
+ for (uint32_t record = 0;; ++record) {
+ char *errmsg = "";
+ int r = record_read(buffer_0, &key, &data, &errmsg);
+ if (!r) break;
+ if (r < 0) {
+ unlink_void(tmp_path);
+ if (errno == EINVAL) {
+ char recordbuf[sizeof("4294967295")];
+ recordbuf[uint32_fmt(recordbuf, record)] = 0;
+ strerr_dief(111, "syntax error on record ", recordbuf, ": ", errmsg);
+ } else {
+ strerr_diefusys(111, "read from stdin");
+ }
+ }
+ if (!cdbmake_add(&cm, key.s, key.len, data.s, data.len)) {
+ unlink_void(tmp_path);
+ strerr_diefusys(111, "write cdb to ", tmp_path);
+ }
+ }
+ stralloc_free(&key);
+ stralloc_free(&data);
+ if (!cdbmake_finish(&cm)) {
+ unlink_void(tmp_path);
+ strerr_diefusys(111, "cdbmake_finish ", tmp_path);
+ }
+ if (fd_sync(tmp_fd) < 0) {
+ unlink_void(tmp_path);
+ strerr_diefusys(111, "fsync ", tmp_path);
+ }
+ if (rename(tmp_path, cdb_path) < 0) {
+ unlink_void(tmp_path);
+ strerr_diefusys(111, "rename ", tmp_path, " to ", cdb_path);
+ }
+ return 0;
+}