summaryrefslogtreecommitdiffstats
path: root/rax.h
diff options
context:
space:
mode:
authorRemi Collet <remi@remirepo.net>2018-06-14 06:54:34 +0200
committerRemi Collet <remi@remirepo.net>2018-06-14 06:54:34 +0200
commit661d837b26fc7cab302ee2b6dfb54b7681558bf1 (patch)
tree8c40ef0527d5b99f273f74f55e409de67be5668c /rax.h
parentf801a126bd0cad875bde12663f677b6db2cc0e99 (diff)
Redis 5.0 RC2 (4.9.102) - Released Wed Jun 13 12:49:13 CEST 2018
Upgrade urgency CRITICAL: This release fixes important security issues. HIGH: This release fixes a SCAN commands family bug. MODERATE: This release fixes a PSYNC2 edge case with expires. MODERATE: Sentinel related fixes. LOW: All the other issues
Diffstat (limited to 'rax.h')
-rw-r--r--rax.h164
1 files changed, 164 insertions, 0 deletions
diff --git a/rax.h b/rax.h
new file mode 100644
index 0000000..5b4d451
--- /dev/null
+++ b/rax.h
@@ -0,0 +1,164 @@
+#ifndef RAX_H
+#define RAX_H
+
+#include <stdint.h>
+
+/* Representation of a radix tree as implemented in this file, that contains
+ * the strings "foo", "foobar" and "footer" after the insertion of each
+ * word. When the node represents a key inside the radix tree, we write it
+ * between [], otherwise it is written between ().
+ *
+ * This is the vanilla representation:
+ *
+ * (f) ""
+ * \
+ * (o) "f"
+ * \
+ * (o) "fo"
+ * \
+ * [t b] "foo"
+ * / \
+ * "foot" (e) (a) "foob"
+ * / \
+ * "foote" (r) (r) "fooba"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However, this implementation implements a very common optimization where
+ * successive nodes having a single child are "compressed" into the node
+ * itself as a string of characters, each representing a next-level child,
+ * and only the link to the node representing the last character node is
+ * provided inside the representation. So the above representation is turend
+ * into:
+ *
+ * ["foo"] ""
+ * |
+ * [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However this optimization makes the implementation a bit more complex.
+ * For instance if a key "first" is added in the above radix tree, a
+ * "node splitting" operation is needed, since the "foo" prefix is no longer
+ * composed of nodes having a single child one after the other. This is the
+ * above tree and the resulting node splitting after this event happens:
+ *
+ *
+ * (f) ""
+ * /
+ * (i o) "f"
+ * / \
+ * "firs" ("rst") (o) "fo"
+ * / \
+ * "first" [] [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * Similarly after deletion, if a new chain of nodes having a single child
+ * is created (the chain must also not include nodes that represent keys),
+ * it must be compressed back into a single node.
+ *
+ */
+
+#define RAX_NODE_MAX_SIZE ((1<<29)-1)
+typedef struct raxNode {
+ uint32_t iskey:1; /* Does this node contain a key? */
+ uint32_t isnull:1; /* Associated value is NULL (don't store it). */
+ uint32_t iscompr:1; /* Node is compressed. */
+ uint32_t size:29; /* Number of children, or compressed string len. */
+ /* Data layout is as follows:
+ *
+ * If node is not compressed we have 'size' bytes, one for each children
+ * character, and 'size' raxNode pointers, point to each child node.
+ * Note how the character is not stored in the children but in the
+ * edge of the parents:
+ *
+ * [header strlen=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
+ *
+ * if node is compressed (strlen != 0) the node has 1 children.
+ * In that case the 'size' bytes of the string stored immediately at
+ * the start of the data section, represent a sequence of successive
+ * nodes linked one after the other, for which only the last one in
+ * the sequence is actually represented as a node, and pointed to by
+ * the current compressed node.
+ *
+ * [header strlen=3][xyz][z-ptr](value-ptr?)
+ *
+ * Both compressed and not compressed nodes can represent a key
+ * with associated data in the radix tree at any level (not just terminal
+ * nodes).
+ *
+ * If the node has an associated key (iskey=1) and is not NULL
+ * (isnull=0), then after the raxNode pointers poiting to the
+ * childen, an additional value pointer is present (as you can see
+ * in the representation above as "value-ptr" field).
+ */
+ unsigned char data[];
+} raxNode;
+
+typedef struct rax {
+ raxNode *head;
+ uint64_t numele;
+ uint64_t numnodes;
+} rax;
+
+/* Stack data structure used by raxLowWalk() in order to, optionally, return
+ * a list of parent nodes to the caller. The nodes do not have a "parent"
+ * field for space concerns, so we use the auxiliary stack when needed. */
+#define RAX_STACK_STATIC_ITEMS 32
+typedef struct raxStack {
+ void **stack; /* Points to static_items or an heap allocated array. */
+ size_t items, maxitems; /* Number of items contained and total space. */
+ /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
+ * and use this static array of pointers instead. */
+ void *static_items[RAX_STACK_STATIC_ITEMS];
+ int oom; /* True if pushing into this stack failed for OOM at some point. */
+} raxStack;
+
+/* Radix tree iterator state is encapsulated into this data structure. */
+#define RAX_ITER_STATIC_LEN 128
+#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
+ element for the first iteration and
+ clear the flag. */
+#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */
+#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while
+ iterating. But it is slower. */
+typedef struct raxIterator {
+ int flags;
+ rax *rt; /* Radix tree we are iterating. */
+ unsigned char *key; /* The current string. */
+ void *data; /* Data associated to this key. */
+ size_t key_len; /* Current key length. */
+ size_t key_max; /* Max key len the current key buffer can hold. */
+ unsigned char key_static_string[RAX_ITER_STATIC_LEN];
+ raxNode *node; /* Current node. Only for unsafe iteration. */
+ raxStack stack; /* Stack used for unsafe iteration. */
+} raxIterator;
+
+/* A special pointer returned for not found items. */
+extern void *raxNotFound;
+
+/* Exported API. */
+rax *raxNew(void);
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
+void *raxFind(rax *rax, unsigned char *s, size_t len);
+void raxFree(rax *rax);
+void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
+void raxStart(raxIterator *it, rax *rt);
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
+int raxNext(raxIterator *it);
+int raxPrev(raxIterator *it);
+int raxRandomWalk(raxIterator *it, size_t steps);
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
+void raxStop(raxIterator *it);
+int raxEOF(raxIterator *it);
+void raxShow(rax *rax);
+uint64_t raxSize(rax *rax);
+
+#endif