title: src/crelude/common.c

src/crelude/common.c

Source code

#include "common.h"
#include "io.h"
#include "utf.h"

#include <assert.h>

#ifndef IMPLEMENTATION

inline u0 *or(const u0 *nullable, const u0 *nonnull)
{
    if (nullable == nil)
        return (u0 *)nonnull;
    return (u0 *)nullable;
}

bool is_zero(imax n)
{ return n == 0; }

bool is_zerof(f64 n)
{ return n == 0.0; }

bool is_zeroed(u0 *ptr, usize width)
{
    umin *xs = (umin *)ptr;
    until (width-- == 0) unless (*xs++ == 0) return false;
    return true;
}

u0 zero(u0 *blk, usize width)
{
    if (blk == nil || width == 0)
        return UNIT;
    umin *bytes = blk;
    until (width-- == 0)
        *bytes++ = 0;
    return UNIT;
}

u0 *emalloc(usize len, usize size)
{
    usize bytes = len * size;
    u0 *m = MALLOC(bytes);
    if (m == nil)
        PANIC("Could not allocate %zu bytes.", bytes);
    zero(m, bytes);
    return m;
}

/* in-place */
u0 reverse(u0 *self, usize width)
{
    umin chunk[width];  //< C99 variable-length array.
    MemArray *arr = self;
    umin *ptr = arr->value;
    usize len = arr->len * width;

    for (usize i = 0, j = len - 1; i < j; ++i, --j) {
        memcpy(  chunk, ptr + i, width);
        memcpy(ptr + i, ptr + j, width);
        memcpy(ptr + j,   chunk, width);
    }

    return UNIT;
}

MemSlice reverse_endianness(MemSlice bytes)
{
    MemSlice copy;
    copy = COPY(bytes);

    for (usize i = 0, j = copy.len - 1; i < j; ++i, --j) {
        umin temp = NTH(copy, i);
        NTH(copy, i) = NTH(copy, j);
        NTH(copy, j) = temp;
    }

    return copy;
}

bool is_little_endian()
{
    uword w = 0x01;
    return *(umin *)&w == 1;
}

u128 big_endian(umin *start, usize bytes)
{
    assert(bytes <= sizeof(u128));
    // Write to offset in word (big endian), then reverse
    // depending on endianess. i.e., [0x32][0xF1] becomes:
    //   [0x00][0x00][0x32][0xF1]  (big endian), or
    //   [0xF1][0x32][0x00][0x00]  (little endian)
    umin mem[sizeof(u128)];
    memset(mem, 0, sizeof(u128));
    memcpy(mem + sizeof(u128) - bytes, start, bytes);

    MemSlice reversed = VIEW(MemSlice, mem, 0, sizeof(u128));
    if (is_little_endian())  // reverse `mem` if little endian.
        reverse(&reversed, 1);

    return *(u128 *)mem;
}

u0 memswap(umin *a, umin *b, usize bytes)
{
    usize words = bytes / WORD_SIZE;
    usize trail = words * WORD_SIZE;  //< Where left-over bytes start.

    // Swap word sized blocks first.
    for (usize i = 0; i < words; i += WORD_SIZE) {
        uword tmp;
        memcpy( &tmp, a + i, WORD_SIZE);
        memcpy(a + i, b + i, WORD_SIZE);
        memcpy(b + i,  &tmp, WORD_SIZE);
    }
    // Swap remaining bytes, byte-by-byte.
    for (usize i = trail; i < bytes; ++i) {
        umin tmp = a[i];
        a[i] = b[i];
        b[i] = tmp;
    }
}

static u0 swap_blocks(MemSlice block, usize pivot)
{  // Tail-recursion should be optimised here.
    if (pivot == 0 || pivot == block.len)
        return;  // We are already done.

    MemSlice A = SLICE(MemSlice, block, 0, pivot);
    MemSlice B = SLICE(MemSlice, block, pivot, -1);
    MemSlice A1, A2;
    MemSlice B1, B2;

    if (A.len < B.len) {
        B1 = SLICE(MemSlice, B, 0, -A.len - 1);
        B2 = SLICE(MemSlice, B, B1.len, -1);
        assert(A.len == B2.len);

        memswap(A.value, B2.value, A.len);
        B = SLICE(MemSlice, block, 0, B.len);
        swap_blocks(B, pivot);
    } else if (A.len > B.len) {
        A1 = SLICE(MemSlice, A, 0, B.len);
        A2 = SLICE(MemSlice, A, A1.len, -1);
        assert(B.len == A1.len);
        assert(pivot == A1.len + A2.len);

        memswap(B.value, A1.value, B.len);
        A = SLICE(MemSlice, block, B.len, -1);
        swap_blocks(A, pivot - B.len);
    } else {
        // Same length, and thus trivial.
        memswap(A.value, B.value, A.len);
    }
}

u0 swap(u0 *self, usize pivot, usize width)
{
    // Deal with everything in terms of bytes.
    MemSlice blk = *(MemSlice *)self;
    blk.len *= width;
    usize pos = pivot * width;
    swap_blocks(blk, pos);
}

usize resize(u0 *self, usize cap, usize width)
{
    MemArray *arr = self;
    if (arr->cap == cap)
        return arr->cap - arr->len;
    // Lowering the capacity; best done by REALLOC.
    if (cap < arr->cap) {
        arr->value = REALLOC(arr->value, cap * width);
    } else {  // Growing the capacity; must zero bytes.
        umin *new = emalloc(cap, width);
        memcpy(new, arr->value, arr->cap * width);
        FREE(arr->value);
        arr->value = new;
    }

    arr->cap = cap;
    return arr->cap - arr->len;
}

usize grow(u0 *self, usize count, usize width)
{
    MemArray *arr = self;
    usize old_cap = arr->cap;
    usize new_cap = arr->cap;

    if (arr->len + count > arr->cap) {  // reallocate array.
        new_cap = (usize)(arr->cap * ARRAY_REALLOC_FACTOR) + count;
        resize(arr, new_cap, width);
    }

    arr->len += count;
    return new_cap - old_cap;
}

u0 *get(u0 *self, usize index, usize width)
{
    MemArray *arr = self;
    if (index > arr->len) return nil;

    return arr->value + index * width;
}

u0 *set(u0 *self, usize index, const u0 *elem, usize width)
{
    MemArray *arr = self;
    if (index > arr->len || elem == nil) return nil;

    memcpy(arr->value + index * width, elem, width);
    return arr->value + index * width;
}

usize push(u0 *restrict self, const u0 *restrict element, usize width)
{
    if (element == nil) return 0;

    MemArray *arr = (MemArray *restrict)self;
    umin *elem = (umin *restrict)element;

    usize growth = grow(arr, 1, width);
    memcpy(arr->value + width * (arr->len - 1), elem, width);
    return growth;
}

usize insert(u0 *restrict self, usize index,
             const u0 *restrict element, usize width)
{
    if (element == nil) return 0;

    MemArray *arr = (MemArray *restrict)self;
    umin *elem = (umin *restrict)element;
    usize growth = grow(arr, 1, width);

    umin *gap = arr->value + width * index;
    // Offset elements down by one, from insertion index,
    // leaving us with a gap for insertion.
    memmove(gap + width, gap, width * (arr->len - 1 - index));
    // Insert element into the empty space.
    memcpy(gap, elem, width);

    return growth;
}

usize extend(u0 *restrict self, const u0 *restrict slice, usize width)
{
    if (slice == nil) return 0;

    MemArray *arr = (MemArray *restrict)self;
    MemSlice *sub = (MemSlice *restrict)slice;

    if (sub->len == 0) return 0;

    usize end = arr->len;  //< old array length.
    usize growth = grow(arr, sub->len, width);

    memcpy(arr->value + width * end, sub->value, width * sub->len);
    return growth;
}

usize splice(u0 *restrict self, usize index, const u0 *restrict slice, usize width)
{
    if (slice == nil) return 0;

    MemArray *arr = (MemArray *restrict)self;
    MemSlice *sub = (MemSlice *restrict)slice;

    if (sub->len == 0) return 0;

    usize end = arr->len;
    usize growth = grow(arr, sub->len, width);
    umin *gap = arr->value + width * index;
    // Offset elements down by the slice length, from insertion index,
    // leaving us with a `width * sub.len` byte gap for insertion.
    memmove(gap + width * sub->len, gap, width * (end - index));
    // Insert slice into empty space.
    memcpy(gap, sub->value, sub->len * width);

    return growth;
}

GenericSlice cut(u0 *self, usize from, isize upto, usize width)
{
    GenericArray *arr_ptr = self;
    MemArray arr = *(MemArray *)self;

    usize final = upto < 0 ? arr.len + upto : (usize)upto;
    assert(final < arr.len);

    if (from > final) {
        usize tmp = from;
        from = final;
        final = tmp;
    }
    usize pivot = final - from + 1;

    // Deal with bytes instead.
    arr.len *= width;
    arr.cap *= width;
    from *= width;
    final *= width;
    pivot *= width;

    MemSlice tail = SLICE(MemSlice, arr, from, arr.len);
    swap_blocks(tail, pivot);
    tail = SLICE(MemSlice, tail, tail.len - pivot, -1);

    u0 *tail_ptr = &tail;
    GenericSlice trailing = *(GenericSlice *)tail_ptr;
    trailing.len /= width;
    arr_ptr->len -= pivot / width;  // Capacity the same.
    return trailing;  // Return removed slice which is left over at end.
}

u0 *pop(u0 *self, usize width)
{
    MemArray *arr = self;
    assert(arr->len > 0);
    return (u0 *)(arr->value + --arr->len * width);
}

u0 *shift(u0 *self, usize width)
{
    MemSlice *arr = self;
    assert(arr->len > 0);

    swap(self, 1, width);
    return (u0 *)(arr->value + --arr->len * width);
}

usize null(u0 *self, usize width)
{
    GenericArray arr = *(GenericArray *)self;
    usize bytes = arr.cap * width;
    zero(PTR(arr), bytes);
    return bytes;
}

string from_cstring(const byte *cstring)
{
    return VIEW(string, (byte *)cstring, 0, strlen(cstring));
}

bool string_eq(string self, const string other)
{
    unless (self.len == other.len)
        return false;
    else if (self.value == other.value)
        return true;

    foreach (c, self)
        if (c != NTH(other, it.index))
            return false;
    return true;
}

i16 string_ncmp(const string self, const string other, usize n)
{ return strncmp(UNWRAP(self), UNWRAP(other), n); }

i16 string_cmp(const string self, const string other)
{
    byte *ptr0 = self.value,
         *ptr1 = other.value;

    if (ptr0 == ptr1) {
        if (self.len == other.len) return 0;
        return self.len > other.len
            ? ptr0[other.len] - 0
            : 0 - ptr1[self.len];
    }

    byte c0, c1;
    usize len = min(self.len, other.len);
    for (usize i = 0; i < len; ++i)
        if ((c0 = ptr0[i]) != (c1 = ptr1[i]))
            return c0 - c1;

    if (self.len == other.len) return 0;
    if (self.len == len)
        return 0 - ptr1[len];
    return ptr0[len] - 0;
}

u64 hash_bytes(MemSlice mem)
{
    u64 hash = 5381;
    foreach (c, mem)
        hash += c + (hash << 5);

    return hash;
}

u64 hash_string(string str)
{ return hash_bytes(TO_BYTES(str)); }

static u64 node_hash(const u0 *self, const u0 *node)
{
    const GenericMap *map = self;
    return *(u64 *)((umin *)node + map->hash_offset);
}
static u0 *node_key(const u0 *self, const u0 *node)
{
    const GenericMap *map = self;
    return (umin *)node + map->key_offset;
}
static u0 *node_value(const u0 *self, const u0 *node)
{
    const GenericMap *map = self;
    return (umin *)node + map->value_offset;
}
static u0 *node_next(const u0 *self, const u0 *node)
{
    const GenericMap *map = self;
    return (umin *)node + map->next_offset;
}

static usize bucket_index(const u0 *self, u64 hash)
{
    const GenericMap *map = self;
    return (usize)hash % map->buckets.cap;
}

// TODO: right now we check proper equality, maybe just use the hash,
//       and don't worry about hash-collisions?  `u64` is quite large after all.
static bool key_eq(const u0 *self, const u0 *key0, const u0 *key1)
{
    if (key0 == key1) return true;
    const GenericMap *map = self;

    switch (map->key_type) {
    case HKT_STRING:
    case HKT_MEM_SLICE:;
        const string *s0 = key0, *s1 = key1;
        return string_eq(*s0, *s1);
    case HKT_RUNIC:;
        const runic *r0 = key0, *r1 = key1;
        MemSlice _m0 = TO_BYTES(*r0), _m1 = TO_BYTES(*r1);
        MemSlice *m0 = &_m0, *m1 = &_m1;
        return string_eq(*(string *)m0, *(string *)m1);
    case HKT_CSTRING:
        return 0 == strcmp(*(byte **)key0, *(byte **)key1);
    case HKT_SMALL_INTEGER:
    case HKT_RAW_BYTES:
        return 0 == memcmp(key0, key1, map->key_size);
    }

    return PANIC("Improper hash-map key_type."), false;
}

usize init_hashnode(u0 *node, const u0 *_map, u64 hash, const u0 *key, const u0 *value)
{
    GenericMap *map = (u0 *)_map;
    umin *ptr = node;

    memcpy(ptr + map-> hash_offset, &hash, sizeof(u64));
    memcpy(ptr + map->  key_offset,   key, map->key_size);
    memcpy(ptr + map->value_offset, value, map->value_size);
    memset(ptr + map-> next_offset,     0, sizeof(u0 *));  //< NULL-pointing.

    return map->node_size;
}

u0 associate(u0 *self, const u0 *key, const u0 *value)
{
    GenericMap *map = self;
    const usize NODE_SIZE = map->node_size;

    u64 hash = map->hasher(key, map->key_size);
    usize index = bucket_index(map, hash);

    u0 *head = (umin *)PTR(map->buckets) + index * NODE_SIZE;
    u0 *node = head;  /*< type of `hashnode(K, V)`. */
    u0 *last = nil;
    // Walk up node-chain trying to find if key is already present.
    until (node == nil || node_hash(map, node) == 0) {  // zero-hash = unpopulated.
        if (key_eq(map, node_key(map, node), key)) {
            // Found node with equal key, so rewrite value.
            memcpy(node_value(map, node), value, map->value_size);
            return UNIT;
        }
        last = node;
        node = *(u0 **)node_next(map, node);
    }
    // Otherwise, insert the new key-value pair into the chain.
    assert(node == nil || node_hash(map, node) == 0);
    ++map->len;

    u0 *new = emalloc(1, NODE_SIZE);
    init_hashnode(new, map, hash, key, value);

    if (last == nil) {  // i.e. chain hasn't started.
        assert(node == head);
        memcpy(head, new, NODE_SIZE);
        grow(&map->buckets, 1, NODE_SIZE);
        FREE(new);
    } else {  // otherwise, make the node part of the linked list.
        u0 **last_next = node_next(map, last);
        assert(*last_next == nil);
        *last_next = new;
    }

    const f64 LOAD_FACTOR = (f64)map->len / map->buckets.cap;
    if (LOAD_FACTOR < HASHMAP_LOAD_THRESHOLD)
        return UNIT;
    // ^ if load factor (entries per potential bucket) gets over ~85%,
    // we should ~double it in capacity, preventing the linked lists
    // from getting to long, and lookup time too slow.

    GenericArray snodes = AMAKE(umin[NODE_SIZE], map->len);
    for (usize i = 0; i < map->buckets.cap; ++i) {
        u0 *snode = (umin *)PTR(map->buckets) + i * NODE_SIZE;
        if (node_hash(map, snode) == 0) continue;
        bool first = true;
        until (snode == nil) {
            u0 **next_field = node_next(map, snode);
            u0 *next = *next_field;
            *next_field = nil;
            push(&snodes, snode, NODE_SIZE);
            snode = next;
            if (first) { first = false; continue; }
            FREE(snode);  // we have a copy.
        }
    }
    // copied `snodes` (base/bucket nodes), now blank out the buckets array,
    // and rewrite it with new and correct indices.
    resize(&map->buckets,
           CEIL(usize, HASHMAP_GROWTH_FACTOR
             * (LOAD_FACTOR / HASHMAP_LOAD_THRESHOLD)
             * map->buckets.cap),
           NODE_SIZE);
    null(&map->buckets, NODE_SIZE);
    // repopulate.
    umin *bucket = (u0 *)PTR(map->buckets);
    for (usize i = 0; i < snodes.len; ++i) {
        u0 *snode = get(&snodes, i, NODE_SIZE);
        u64 shash = node_hash(map, snode);
        usize sindex = bucket_index(map, shash);
        u0 *bnode = bucket + sindex * NODE_SIZE;
        // copy directly into bucket array.
        if (node_hash(map, bnode) == 0) {
            memcpy(bnode, snode, NODE_SIZE);
            continue;
        }
        // otherwise, append to chain.
        until (*(u0 **)node_next(map, bnode) == nil)
            bnode = *(u0 **)node_next(map, bnode);
        assert(bnode != nil);
        // put `snode` on heap, and link up pointer to it.
        u0 *new_snode = emalloc(1, NODE_SIZE);
        memcpy(new_snode, snode, NODE_SIZE);
        u0 **next_field = node_next(map, bnode);
        *next_field = new_snode;
    }
    FREE_INSIDE(snodes);

    return UNIT;
}

u0 *lookup(u0 *self, const u0 *key)
{
    GenericMap *map = self;
    u64 hash = map->hasher(key, map->key_size);
    usize index = bucket_index(map, hash);

    u0 *head = (umin *)PTR(map->buckets) + index * map->node_size;
    // search hashnode-chain.
    until (head == nil || node_hash(map, head) == 0) {
        if (key_eq(map, node_key(map, head), key))
            return node_value(map, head);
        head = *(u0 **)node_next(map, head);
    }

    return nil;
}

bool drop(u0 *self, const u0 *key)
{
    GenericMap *map = self;
    u64 hash = map->hasher(key, map->key_size);
    usize index = bucket_index(map, hash);

    u0 *head = (umin *)PTR(map->buckets) + index * map->node_size;
    u0 *node = head;
    u0 *last = nil;
    // search hasnode-chain.
    until (node == nil || node_hash(map, node) == 0) {
        if (key_eq(map, node_key(map, node), key))
            break;
        last = node;
        node = *(u0 **)node_next(map, node);
    }

    if (node == nil || node_hash(map, node) == 0)
        // i.e. loop did not break, and
        // so the key does not exist.
        return false;

    if (node == head) {  // delete from bucket array directly.
        assert(last == nil);
        u0 *next = *(u0 **)node_next(map, node);
        if (next == nil) {
            zero(node, map->node_size);
            --map->buckets.len;
        } else {
            // otherwise, we need to copy the node into the bucket array,
            // overwriting the node that we are deleting.
            memcpy(node, next, map->node_size);
            FREE(next);  //< it is now in the base of the bucket array.
        }
    } else {
        // otherwise, re-order the pointers and free the node.
        assert(last != nil);
        u0 **last_next_field = node_next(map, last);
        u0 **node_next_field = node_next(map, node);
        *last_next_field = *node_next_field;
        FREE(node);
    }

    --map->len;
    return true;
}

GenericSlice get_keys(u0 *self)
{
    GenericMap *map = self;
    GenericSlice ks = {
        .len = map->len,
        .value = emalloc(map->len, map->key_size)
    };

    usize j = 0;
    for (usize i = 0; i < map->buckets.cap; ++i) {
        u0 *node = (umin *)PTR(map->buckets) + i * map->node_size;
        if (node_hash(map, node) == 0) continue;

        until (node == nil) {
            set(&ks, j++, node_key(map, node), map->key_size);
            node = *(u0 **)node_next(map, node);
        }
    }
    assert(j == map->len);
    return ks;
}

bool has_key(u0 *self, u0 *key)
{
    GenericMap *map = self;
    for (usize i = 0; i < map->buckets.cap; ++i) {
        u0 *node = (umin *)PTR(map->buckets) + i * map->node_size;
        if (node_hash(map, node) == 0) continue;

        until (node == nil) {
            if (key_eq(map, node_key(map, node), key))
                return true;
            node = *(u0 **)node_next(map, node);
        }
    }
    return false;
}

u0 empty_map(u0 *self)
{
    GenericMap *map = self;
    for (usize i = 0; i < map->buckets.cap; ++i) {
        u0 *node = (umin *)PTR(map->buckets) + i * map->node_size;
        if (node_hash(map, node) == 0) continue;

        node = *(u0 **)node_next(map, node);
        until (node == nil) {
            u0 *next = *(u0 **)node_next(map, node);
            FREE(node);
            node = next;
        }
    }
    zero(PTR(map->buckets), map->node_size * map->buckets.cap);
    return UNIT;
}

bool is_empty_map(u0 *self)
{
    GenericMap *map = self;
    if (PTR(map->buckets) == nil) return true;

    for (usize i = 0; i < map->buckets.cap; ++i) {
        u0 *node = (umin *)PTR(map->buckets) + i * map->node_size;
        if (node_hash(map, node) != 0) return false;
    }

    return true;
}

u0 free_map(u0 *self)
{
    GenericMap *map = self;
    empty_map(map);
    FREE_INSIDE(map->buckets);
    map->buckets.value = nil;
    return UNIT;
}

u0 dump_hashmap(u0 *self, byte *key_fmt, byte *value_fmt)
{
    GenericMap *map = self;
    const usize NODE_SIZE = map->node_size;
    umin *buckets = (u0 *)PTR(map->buckets);
    eprintln("entries:     %zu", map->len);
    eprintln("buckets.cap: %zu", map->buckets.cap);
    eprintln("buckets.len: %zu", map->buckets.len);

    for (usize i = 0; i < map->buckets.cap; ++i) {
        u0 *node = buckets + i * NODE_SIZE;
        u64 bhash = node_hash(map, node);

        eprint("| %02zu |", i);
        if (bhash == 0) {
            eprintln(" -x-");
            continue;
        }

        until (node == nil) {
            struct { umin _[16]; } key = { 0 }, value = { 0 };
            memcpy(&key, node_key(map, node), map->key_size);
            memcpy(&value, node_value(map, node), map->value_size);
            u64 hash = node_hash(map, node);
            string formatter = sprint(" -> [%s (%06llX): %s]",
                                      key_fmt, hash, value_fmt);
            eprint(PTR(formatter), key, value);
            node = *(u0 **)node_next(map, node);
        }
        eprint("\n");
    }
}

#endif

Updated on 23 August 2022 at 00:54:19 UTC