/*
* libkmod - interface to kernel module operations
*
* Copyright (C) 2011-2013 ProFUSION embedded systems
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
#include <fnmatch.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <shared/macro.h>
#include <shared/strbuf.h>
#include <shared/util.h>
#include "libkmod-internal.h"
#include "libkmod-index.h"
/* libkmod-index.c: module index file implementation
*
* Integers are stored as 32 bit unsigned in "network" order, i.e. MSB first.
* All files start with a magic number.
*
* Magic spells "BOOTFAST". Second one used on newer versioned binary files.
* #define INDEX_MAGIC_OLD 0xB007FA57
*
* We use a version string to keep track of changes to the binary format
* This is stored in the form: INDEX_MAJOR (hi) INDEX_MINOR (lo) just in
* case we ever decide to have minor changes that are not incompatible.
*/
#define INDEX_MAGIC 0xB007F457
#define INDEX_VERSION_MAJOR 0x0002
#define INDEX_VERSION_MINOR 0x0001
#define INDEX_VERSION ((INDEX_VERSION_MAJOR<<16)|INDEX_VERSION_MINOR)
/* The index file maps keys to values. Both keys and values are ASCII strings.
* Each key can have multiple values. Values are sorted by an integer priority.
*
* The reader also implements a wildcard search (including range expressions)
* where the keys in the index are treated as patterns.
* This feature is required for module aliases.
*/
#define INDEX_CHILDMAX 128
/* Disk format:
*
* uint32_t magic = INDEX_MAGIC;
* uint32_t version = INDEX_VERSION;
* uint32_t root_offset;
*
* (node_offset & INDEX_NODE_MASK) specifies the file offset of nodes:
*
* char[] prefix; // nul terminated
*
* char first;
* char last;
* uint32_t children[last - first + 1];
*
* uint32_t value_count;
* struct {
* uint32_t priority;
* char[] value; // nul terminated
* } values[value_count];
*
* (node_offset & INDEX_NODE_FLAGS) indicates which fields are present.
* Empty prefixes are omitted, leaf nodes omit the three child-related fields.
*
* This could be optimised further by adding a sparse child format
* (indicated using a new flag).
*
*
* Implementation is based on a radix tree, or "trie".
* Each arc from parent to child is labelled with a character.
* Each path from the root represents a string.
*
* == Example strings ==
*
* ask
* ate
* on
* once
* one
*
* == Key ==
* + Normal node
* * Marked node, representing a key and it's values.
*
* +
* |-a-+-s-+-k-*
* | |
* | `-t-+-e-*
* |
* `-o-+-n-*-c-+-e-*
* |
* `-e-*
*
* Naive implementations tend to be very space inefficient; child pointers
* are stored in arrays indexed by character, but most child pointers are null.
*
* Our implementation uses a scheme described by Wikipedia as a Patrica trie,
*
* "easiest to understand as a space-optimized trie where
* each node with only one child is merged with its child"
*
* +
* |-a-+-sk-*
* | |
* | `-te-*
* |
* `-on-*-ce-*
* |
* `-e-*
*
* We still use arrays of child pointers indexed by a single character;
* the remaining characters of the label are stored as a "prefix" in the child.
*
* The paper describing the original Patrica trie works on individiual bits -
* each node has a maximum of two children, which increases space efficiency.
* However for this application it is simpler to use the ASCII character set.
* Since the index file is read-only, it can be compressed by omitting null
* child pointers at the start and end of arrays.
*/
/* Format of node offsets within index file */
enum node_offset {
INDEX_NODE_FLAGS = 0xF0000000, /* Flags in high nibble */
INDEX_NODE_PREFIX = 0x80000000,
INDEX_NODE_VALUES = 0x40000000,
INDEX_NODE_CHILDS = 0x20000000,
INDEX_NODE_MASK = 0x0FFFFFFF, /* Offset value */
};
void index_values_free(struct index_value *values)
{
while (values) {
struct index_value *value = values;
values = value->next;
free(value);
}
}
static int add_value(struct index_value **values,
const char *value, unsigned len, unsigned int priority)
{
struct index_value *v;
/* find position to insert value */
while (*values && (*values)->priority < priority)
values = &(*values)->next;
v = malloc(sizeof(struct index_value) + len + 1);
if (!v)
return -1;
v->next = *values;
v->priority = priority;
v->len = len;
memcpy(v->value, value, len);
v->value[len] = '\0';
*values = v;
return 0;
}
static void read_error(void)
{
fatal("Module index: unexpected error: %s\n"
"Try re-running depmod\n", errno ? strerror(errno) : "EOF");
}
static int read_char(FILE *in)
{
int ch;
errno = 0;
ch = getc_unlocked(in);
if (ch == EOF)
read_error();
return ch;
}
static uint32_t read_long(FILE *in)
{
uint32_t l;
errno = 0;
if (fread(&l, sizeof(uint32_t), 1, in) != sizeof(uint32_t))
read_error();
return ntohl(l);
}
static unsigned buf_freadchars(struct strbuf *buf, FILE *in)
{
unsigned i = 0;
int ch;
while ((ch = read_char(in))) {
if (!strbuf_pushchar(buf, ch))
break;
i++;
}
return i;
}
/*
* Index file searching
*/
struct index_node_f {
FILE *file;
char *prefix; /* path compression */
struct index_value *values;
unsigned char first; /* range of child nodes */
unsigned char last;
uint32_t children[0];
};
static struct index_node_f *index_read(FILE *in, uint32_t offset)
{
struct index_node_f *node;
char *prefix;
int i, child_count = 0;
if ((offset & INDEX_NODE_MASK) == 0)
return NULL;
if (fseek(in, offset & INDEX_NODE_MASK, SEEK_SET) < 0)
return NULL;
if (offset & INDEX_NODE_PREFIX) {
struct strbuf buf;
strbuf_init(&buf);
buf_freadchars(&buf, in);
prefix = strbuf_steal(&buf);
} else
prefix = NOFAIL(strdup(""));
if (offset & INDEX_NODE_CHILDS) {
char first = read_char(in);
char last = read_char(in);
child_count = last - first + 1;
node = NOFAIL(malloc(sizeof(struct index_node_f) +
sizeof(uint32_t) * child_count));
node->first = first;
node->last = last;
for (i = 0; i < child_count; i++)
node->children[i] = read_long(in);
} else {
node = NOFAIL(malloc(sizeof(struct index_node_f)));
node->first = INDEX_CHILDMAX;
node->last = 0;
}
node->values = NULL;
if (offset & INDEX_NODE_VALUES) {
int value_count;
struct strbuf buf;
const char *value;
unsigned int priority;
value_count = read_long(in);
strbuf_init(&buf);
while (value_count--) {
priority = read_long(in);
buf_freadchars(&buf, in);
value = strbuf_str(&buf);
add_value(&node->values, value, buf.used, priority);
strbuf_clear(&buf);
}
strbuf_release(&buf);
}
node->prefix = prefix;
node->file = in;
return node;
}
static void index_close(struct index_node_f *node)
{
free(node->prefix);
index_values_free(node->values);
free(node);
}
struct index_file {
FILE *file;
uint32_t root_offset;
};
struct index_file *index_file_open(const char *filename)
{
FILE *file;
uint32_t magic, version;
struct index_file *new;
file = fopen(filename, "re");
if (!file)
return NULL;
errno = EINVAL;
magic = read_long(file);
if (magic != INDEX_MAGIC) {
fclose(file);
return NULL;
}
version = read_long(file);
if (version >> 16 != INDEX_VERSION_MAJOR) {
fclose(file);
return NULL;
}
new = NOFAIL(malloc(sizeof(struct index_file)));
new->file = file;
new->root_offset = read_long(new->file);
errno = 0;
return new;
}
void index_file_close(struct index_file *idx)
{
fclose(idx->file);
free(idx);
}
static struct index_node_f *index_readroot(struct index_file *in)
{
return index_read(in->file, in->root_offset);
}
static struct index_node_f *index_readchild(const struct index_node_f *parent,
int ch)
{
if (parent->first <= ch && ch <= parent->last) {
return index_read(parent->file,
parent->children[ch - parent->first]);
}
return NULL;
}
static void index_dump_node(struct index_node_f *node, struct strbuf *buf,
int fd)
{
struct index_value *v;
int ch, pushed;
pushed = strbuf_pushchars(buf, node->prefix);
for (v = node->values; v != NULL; v = v->next) {
write_str_safe(fd, buf->bytes, buf->used);
write_str_safe(fd, " ", 1);
write_str_safe(fd, v->value, strlen(v->value));
write_str_safe(fd, "\n", 1);
}
for (ch = node->first; ch <= node->last; ch++) {
struct index_node_f *child = index_readchild(node, ch);
if (!child)
continue;
strbuf_pushchar(buf, ch);
index_dump_node(child, buf, fd);
strbuf_popchar(buf);
}
strbuf_popchars(buf, pushed);
index_close(node);
}
void index_dump(struct index_file *in, int fd, const char *prefix)
{
struct index_node_f *root;
struct strbuf buf;
root = index_readroot(in);
if (root == NULL)
return;
strbuf_init(&buf);
strbuf_pushchars(&buf, prefix);
index_dump_node(root, &buf, fd);
strbuf_release(&buf);
}
static char *index_search__node(struct index_node_f *node, const char *key, int i)
{
char *value;
struct index_node_f *child;
int ch;
int j;
while(node) {
for (j = 0; node->prefix[j]; j++) {
ch = node->prefix[j];
if (ch != key[i+j]) {
index_close(node);
return NULL;
}
}
i += j;
if (key[i] == '\0') {
value = node->values != NULL
? strdup(node->values[0].value)
: NULL;
index_close(node);
return value;
}
child = index_readchild(node, key[i]);
index_close(node);
node = child;
i++;
}
return NULL;
}
/*
* Search the index for a key
*
* Returns the value of the first match
*
* The recursive functions free their node argument (using index_close).
*/
char *index_search(struct index_file *in, const char *key)
{
// FIXME: return value by reference instead of strdup
struct index_node_f *root;
char *value;
root = index_readroot(in);
value = index_search__node(root, key, 0);
return value;
}
/* Level 4: add all the values from a matching node */
static void index_searchwild__allvalues(struct index_node_f *node,
struct index_value **out)
{
struct index_value *v;
for (v = node->values; v != NULL; v = v->next)
add_value(out, v->value, v->len, v->priority);
index_close(node);
}
/*
* Level 3: traverse a sub-keyspace which starts with a wildcard,
* looking for matches.
*/
static void index_searchwild__all(struct index_node_f *node, int j,
struct strbuf *buf,
const char *subkey,
struct index_value **out)
{
int pushed = 0;
int ch;
while (node->prefix[j]) {
ch = node->prefix[j];
strbuf_pushchar(buf, ch);
pushed++;
j++;
}
for (ch = node->first; ch <= node->last; ch++) {
struct index_node_f *child = index_readchild(node, ch);
if (!child)
continue;
strbuf_pushchar(buf, ch);
index_searchwild__all(child, 0, buf, subkey, out);
strbuf_popchar(buf);
}
if (node->values) {
if (fnmatch(strbuf_str(buf), subkey, 0) == 0)
index_searchwild__allvalues(node, out);
else
index_close(node);
} else {
index_close(node);
}
strbuf_popchars(buf, pushed);
}
/* Level 2: descend the tree (until we hit a wildcard) */
static void index_searchwild__node(struct index_node_f *node,
struct strbuf *buf,
const char *key, int i,
struct index_value **out)
{
struct index_node_f *child;
int j;
int ch;
while(node) {
for (j = 0; node->prefix[j]; j++) {
ch = node->prefix[j];
if (ch == '*' || ch == '?' || ch == '[') {
index_searchwild__all(node, j, buf,
&key[i+j], out);
return;
}
if (ch != key[i+j]) {
index_close(node);
return;
}
}
i += j;
child = index_readchild(node, '*');
if (child) {
strbuf_pushchar(buf, '*');
index_searchwild__all(child, 0, buf, &key[i], out);
strbuf_popchar(buf);
}
child = index_readchild(node, '?');
if (child) {
strbuf_pushchar(buf, '?');
index_searchwild__all(child, 0, buf, &key[i], out);
strbuf_popchar(buf);
}
child = index_readchild(node, '[');
if (child) {
strbuf_pushchar(buf, '[');
index_searchwild__all(child, 0, buf, &key[i], out);
strbuf_popchar(buf);
}
if (key[i] == '\0') {
index_searchwild__allvalues(node, out);
return;
}
child = index_readchild(node, key[i]);
index_close(node);
node = child;
i++;
}
}
/*
* Search the index for a key. The index may contain wildcards.
*
* Returns a list of all the values of matching keys.
*/
struct index_value *index_searchwild(struct index_file *in, const char *key)
{
struct index_node_f *root = index_readroot(in);
struct strbuf buf;
struct index_value *out = NULL;
strbuf_init(&buf);
index_searchwild__node(root, &buf, key, 0, &out);
strbuf_release(&buf);
return out;
}
/**************************************************************************/
/*
* Alternative implementation, using mmap to map all the file to memory when
* starting
*/
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
static const char _idx_empty_str[] = "";
struct index_mm {
struct kmod_ctx *ctx;
void *mm;
uint32_t root_offset;
size_t size;
};
struct index_mm_value {
unsigned int priority;
unsigned int len;
const char *value;
};
struct index_mm_value_array {
struct index_mm_value *values;
unsigned int len;
};
struct index_mm_node {
struct index_mm *idx;
const char *prefix; /* mmape'd value */
struct index_mm_value_array values;
unsigned char first;
unsigned char last;
uint32_t children[];
};
static inline uint32_t read_long_mm(void **p)
{
uint8_t *addr = *(uint8_t **)p;
uint32_t v;
/* addr may be unalined to uint32_t */
v = get_unaligned((uint32_t *) addr);
*p = addr + sizeof(uint32_t);
return ntohl(v);
}
static inline uint8_t read_char_mm(void **p)
{
uint8_t *addr = *(uint8_t **)p;
uint8_t v = *addr;
*p = addr + sizeof(uint8_t);
return v;
}
static inline char *read_chars_mm(void **p, unsigned *rlen)
{
char *addr = *(char **)p;
size_t len = *rlen = strlen(addr);
*p = addr + len + 1;
return addr;
}
static struct index_mm_node *index_mm_read_node(struct index_mm *idx,
uint32_t offset) {
void *p = idx->mm;
struct index_mm_node *node;
const char *prefix;
int i, child_count, value_count, children_padding;
uint32_t children[INDEX_CHILDMAX];
char first, last;
if ((offset & INDEX_NODE_MASK) == 0)
return NULL;
p = (char *)p + (offset & INDEX_NODE_MASK);
if (offset & INDEX_NODE_PREFIX) {
unsigned len;
prefix = read_chars_mm(&p, &len);
} else
prefix = _idx_empty_str;
if (offset & INDEX_NODE_CHILDS) {
first = read_char_mm(&p);
last = read_char_mm(&p);
child_count = last - first + 1;
for (i = 0; i < child_count; i++)
children[i] = read_long_mm(&p);
} else {
first = (char)INDEX_CHILDMAX;
last = 0;
child_count = 0;
}
children_padding = (sizeof(struct index_mm_node) +
(sizeof(uint32_t) * child_count)) % sizeof(void *);
if (offset & INDEX_NODE_VALUES)
value_count = read_long_mm(&p);
else
value_count = 0;
node = malloc(sizeof(struct index_mm_node)
+ sizeof(uint32_t) * child_count + children_padding
+ sizeof(struct index_mm_value) * value_count);
if (node == NULL)
return NULL;
node->idx = idx;
node->prefix = prefix;
if (value_count == 0)
node->values.values = NULL;
else {
node->values.values = (struct index_mm_value *)
((char *)node + sizeof(struct index_mm_node) +
sizeof(uint32_t) * child_count + children_padding);
}
node->values.len = value_count;
node->first = first;
node->last = last;
memcpy(node->children, children, sizeof(uint32_t) * child_count);
for (i = 0; i < value_count; i++) {
struct index_mm_value *v = node->values.values + i;
v->priority = read_long_mm(&p);
v->value = read_chars_mm(&p, &v->len);
}
return node;
}
static void index_mm_free_node(struct index_mm_node *node)
{
free(node);
}
struct index_mm *index_mm_open(struct kmod_ctx *ctx, const char *filename,
unsigned long long *stamp)
{
int fd;
struct stat st;
struct index_mm *idx;
struct {
uint32_t magic;
uint32_t version;
uint32_t root_offset;
} hdr;
void *p;
DBG(ctx, "file=%s\n", filename);
idx = malloc(sizeof(*idx));
if (idx == NULL) {
ERR(ctx, "malloc: %m\n");
return NULL;
}
if ((fd = open(filename, O_RDONLY|O_CLOEXEC)) < 0) {
DBG(ctx, "open(%s, O_RDONLY|O_CLOEXEC): %m\n", filename);
goto fail_open;
}
if (fstat(fd, &st) < 0)
goto fail_nommap;
if ((size_t) st.st_size < sizeof(hdr))
goto fail_nommap;
if ((idx->mm = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0))
== MAP_FAILED) {
ERR(ctx, "mmap(NULL, %"PRIu64", PROT_READ, %d, MAP_PRIVATE, 0): %m\n",
st.st_size, fd);
goto fail_nommap;
}
p = idx->mm;
hdr.magic = read_long_mm(&p);
hdr.version = read_long_mm(&p);
hdr.root_offset = read_long_mm(&p);
if (hdr.magic != INDEX_MAGIC) {
ERR(ctx, "magic check fail: %x instead of %x\n", hdr.magic,
INDEX_MAGIC);
goto fail;
}
if (hdr.version >> 16 != INDEX_VERSION_MAJOR) {
ERR(ctx, "major version check fail: %u instead of %u\n",
hdr.version >> 16, INDEX_VERSION_MAJOR);
goto fail;
}
idx->root_offset = hdr.root_offset;
idx->size = st.st_size;
idx->ctx = ctx;
close(fd);
*stamp = stat_mstamp(&st);
return idx;
fail:
munmap(idx->mm, st.st_size);
fail_nommap:
close(fd);
fail_open:
free(idx);
return NULL;
}
void index_mm_close(struct index_mm *idx)
{
munmap(idx->mm, idx->size);
free(idx);
}
static struct index_mm_node *index_mm_readroot(struct index_mm *idx)
{
return index_mm_read_node(idx, idx->root_offset);
}
static struct index_mm_node *index_mm_readchild(const struct index_mm_node *parent,
int ch)
{
if (parent->first <= ch && ch <= parent->last) {
return index_mm_read_node(parent->idx,
parent->children[ch - parent->first]);
}
return NULL;
}
static void index_mm_dump_node(struct index_mm_node *node, struct strbuf *buf,
int fd)
{
struct index_mm_value *itr, *itr_end;
int ch, pushed;
pushed = strbuf_pushchars(buf, node->prefix);
itr = node->values.values;
itr_end = itr + node->values.len;
for (; itr < itr_end; itr++) {
write_str_safe(fd, buf->bytes, buf->used);
write_str_safe(fd, " ", 1);
write_str_safe(fd, itr->value, itr->len);
write_str_safe(fd, "\n", 1);
}
for (ch = node->first; ch <= node->last; ch++) {
struct index_mm_node *child = index_mm_readchild(node, ch);
if (child == NULL)
continue;
strbuf_pushchar(buf, ch);
index_mm_dump_node(child, buf, fd);
strbuf_popchar(buf);
}
strbuf_popchars(buf, pushed);
index_mm_free_node(node);
}
void index_mm_dump(struct index_mm *idx, int fd, const char *prefix)
{
struct index_mm_node *root;
struct strbuf buf;
root = index_mm_readroot(idx);
if (root == NULL)
return;
strbuf_init(&buf);
strbuf_pushchars(&buf, prefix);
index_mm_dump_node(root, &buf, fd);
strbuf_release(&buf);
}
static char *index_mm_search_node(struct index_mm_node *node, const char *key,
int i)
{
char *value;
struct index_mm_node *child;
int ch;
int j;
while(node) {
for (j = 0; node->prefix[j]; j++) {
ch = node->prefix[j];
if (ch != key[i+j]) {
index_mm_free_node(node);
return NULL;
}
}
i += j;
if (key[i] == '\0') {
value = node->values.len > 0
? strdup(node->values.values[0].value)
: NULL;
index_mm_free_node(node);
return value;
}
child = index_mm_readchild(node, key[i]);
index_mm_free_node(node);
node = child;
i++;
}
return NULL;
}
/*
* Search the index for a key
*
* Returns the value of the first match
*
* The recursive functions free their node argument (using index_close).
*/
char *index_mm_search(struct index_mm *idx, const char *key)
{
// FIXME: return value by reference instead of strdup
struct index_mm_node *root;
char *value;
root = index_mm_readroot(idx);
value = index_mm_search_node(root, key, 0);
return value;
}
/* Level 4: add all the values from a matching node */
static void index_mm_searchwild_allvalues(struct index_mm_node *node,
struct index_value **out)
{
struct index_mm_value *itr, *itr_end;
itr = node->values.values;
itr_end = itr + node->values.len;
for (; itr < itr_end; itr++)
add_value(out, itr->value, itr->len, itr->priority);
index_mm_free_node(node);
}
/*
* Level 3: traverse a sub-keyspace which starts with a wildcard,
* looking for matches.
*/
static void index_mm_searchwild_all(struct index_mm_node *node, int j,
struct strbuf *buf,
const char *subkey,
struct index_value **out)
{
int pushed = 0;
int ch;
while (node->prefix[j]) {
ch = node->prefix[j];
strbuf_pushchar(buf, ch);
pushed++;
j++;
}
for (ch = node->first; ch <= node->last; ch++) {
struct index_mm_node *child = index_mm_readchild(node, ch);
if (!child)
continue;
strbuf_pushchar(buf, ch);
index_mm_searchwild_all(child, 0, buf, subkey, out);
strbuf_popchar(buf);
}
if (node->values.len > 0) {
if (fnmatch(strbuf_str(buf), subkey, 0) == 0)
index_mm_searchwild_allvalues(node, out);
else
index_mm_free_node(node);
} else {
index_mm_free_node(node);
}
strbuf_popchars(buf, pushed);
}
/* Level 2: descend the tree (until we hit a wildcard) */
static void index_mm_searchwild_node(struct index_mm_node *node,
struct strbuf *buf,
const char *key, int i,
struct index_value **out)
{
struct index_mm_node *child;
int j;
int ch;
while(node) {
for (j = 0; node->prefix[j]; j++) {
ch = node->prefix[j];
if (ch == '*' || ch == '?' || ch == '[') {
index_mm_searchwild_all(node, j, buf,
&key[i+j], out);
return;
}
if (ch != key[i+j]) {
index_mm_free_node(node);
return;
}
}
i += j;
child = index_mm_readchild(node, '*');
if (child) {
strbuf_pushchar(buf, '*');
index_mm_searchwild_all(child, 0, buf, &key[i], out);
strbuf_popchar(buf);
}
child = index_mm_readchild(node, '?');
if (child) {
strbuf_pushchar(buf, '?');
index_mm_searchwild_all(child, 0, buf, &key[i], out);
strbuf_popchar(buf);
}
child = index_mm_readchild(node, '[');
if (child) {
strbuf_pushchar(buf, '[');
index_mm_searchwild_all(child, 0, buf, &key[i], out);
strbuf_popchar(buf);
}
if (key[i] == '\0') {
index_mm_searchwild_allvalues(node, out);
return;
}
child = index_mm_readchild(node, key[i]);
index_mm_free_node(node);
node = child;
i++;
}
}
/*
* Search the index for a key. The index may contain wildcards.
*
* Returns a list of all the values of matching keys.
*/
struct index_value *index_mm_searchwild(struct index_mm *idx, const char *key)
{
struct index_mm_node *root = index_mm_readroot(idx);
struct strbuf buf;
struct index_value *out = NULL;
strbuf_init(&buf);
index_mm_searchwild_node(root, &buf, key, 0, &out);
strbuf_release(&buf);
return out;
}