Associative arrays/Creation/C

From Rosetta Code
Revision as of 17:24, 20 August 2011 by rosettacode>Kernigh (Split from Associative arrays/Creation and Associative arrays/Iteration, to make room for expansion. This text has multiple authors.)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Associative arrays/Creation/C is part of Associative arrays/Creation. You may find other members of Associative arrays/Creation at Category:Associative arrays/Creation.
Associative arrays/Creation/C is part of Associative arrays/Iteration. You may find other members of Associative arrays/Iteration at Category:Associative arrays/Iteration.

There are no associative arrays in the C language. Some libraries provide hash tables, red-black trees, or other data structures that can become associative arrays. Some of these libraries are awful. There are too many easy ways to get a compiler error or a segfault.

Libraries

Judy

Example using Judy.

Library: Judy

<lang c>#include <stdio.h>

  1. include <Judy.h>

int main() {

 Pvoid_t assarray = (Pvoid_t) NULL;
 PWord_t value;
 int retval;
 /* populating */
 JSLI(value, assarray, "red");
 *value = 0xff0000;
 JSLI(value, assarray, "green");
 *value = 0x00ff00;
 JSLI(value, assarray, "blue");
 *value = 0x0000ff;
 /* retrieving existent */
 JSLG(value, assarray, "blue");
 printf("blue is #%06lx\n", (unsigned long)*value);
 /* unknown key */
 JSLG(value, assarray, "nonexistingkey");
 if ( value == NULL ) { fprintf(stderr, "key 'nonexistingkey' does not exists\n"); }
 /* deleting */
 JSLD(retval, assarray, "red");
 JSLG(value, assarray, "red");
 if ( value == NULL ) { fprintf(stderr, "key red does not exist anymore\n"); }
 JudySLFreeArray(&assarray, PJE0);
 return 0;

}</lang>

Library: Judy

We can easily iterate over pair of keys (indexes) and values.

<lang c>#include <stdio.h>

  1. include <Judy.h>
  1. define MAXLINELEN 256

int main() {

 Pvoid_t assoc_arr = (Pvoid_t) NULL;
 PWord_t val;
 uint8_t idx[MAXLINELEN];
 // insert some values
 JSLI(val, assoc_arr, "hello");
 *val = 4;
 JSLI(val, assoc_arr, "world");
 *val = 8;
 JSLI(val, assoc_arr, "!");
 *val = 16;
 // iterate over indexes-values
 idx[0] = '\0';
 JSLF(val, assoc_arr, idx);
 while(val != NULL) {
   printf("'%s' -> %d\n", idx, *val);
   JSLN(val, assoc_arr, idx);
 }
 JudySLFreeArray(&assoc_arr, PJE0);
 return 0;

}</lang>

POSIX hcreate()

POSIX defines hcreate(), hdestroy() and hsearch() to manage a hash table. If you have a Unix system or clone, then your libc probably has these functions, so there is no extra library to install.

  • You can only have one hash table, in the entire program!
    • With GNU libc, you can call hcreate_r() to have more than one hash table.
  • There is no way to delete an entry from the table!
  • There is no way to iterate all keys in the table!
  • Every key must be a NUL-terminated string.
  • Every value is a void *, needs a cast to the correct type.
  • The table might become full, and refuse to accept more entries. If you use hcreate(50) then the table might hold at most 50 entries.
  • hdestroy() is almost impossible to use. Many programs keep the table and never call hdestroy().
    • With BSD libc, hdestroy() will call free() with each key in the table.
    • Else the program leaks memory, because there is no way to iterate the keys to free them.

The Linux manual page hcreate(3) has a short example.

The next example shows how to use hdestroy(), how to use intptr_t as a value, how to use double as a key, and how to pretend to delete an entry from the table! POSIX hash table is awful, and the code is too long, but the program does run.

Library: POSIX

<lang c>#include <inttypes.h> /* PRIxPTR */

  1. include <search.h> /* hcreate(), hsearch(), hdestroy() */
  2. include <stdint.h> /* intptr_t */
  3. include <stdio.h> /* perror(), printf(), puts(), snprintf() */
  4. include <stdlib.h> /* calloc(), exit(), free() */
  5. include <string.h> /* strdup() */

void fail(char *message) { perror(message); exit(1); }


/*

* color_example:
*   Maps color strings to integer values, like "red" => 0xff0000.
*
* Because the values (ENTRY data) are pointers, I use casts to and from
* intptr_t to store integers instead of pointers.
*/

void color_example(void) { ENTRY e, *ep; int i;

puts("color_example:");

/* Create an empty table. */ if (hcreate(50) == 0) fail("hcreate");

/* Add keys => values to table. */ { char *keys[] = { "red", "green", "blue" }; intptr_t values[] = { 0xff0000, 0x00ff00, 0x0000ff };

for (i = 0; i < 3; i++) { /* Need strdup() when using ENTER. */ if ((e.key = strdup(keys[i])) == NULL) fail("strdup"); e.data = (void *)values[i];

/* Insert e into table. */ if (hsearch(e, ENTER) == NULL) fail("hsearch"); } }

/* Check if keys exist. */ { char *keys[] = { "blue", "green", "orange", "red" };

for (i = 0; i < 4; i++) { /* Not using ENTER, so no strdup(). */ e.key = keys[i]; ep = hsearch(e, FIND); if (ep) { printf("\t'%s' has value %06" PRIxPTR "\n", ep->key, (intptr_t)ep->data); } else printf("\t'%s' is not in table\n", e.key); } }

/* * Destroy table. On my machine, hdestroy() will free() all the * keys in the table. This is why I had to strdup() my keys. */ hdestroy(); }


/*

* number_example:
*   Maps numbers to strings or numbers, like 2 => 2.71828 or 4 => "four".
*
* First I need a VALUE that can either be a string or a number.
*/

typedef struct value { int v_type;

  1. define T_NUM 0x1 /* use v_num */
  2. define T_STR 0x2 /* use v_str */
  3. define T_DEL 0x4 /* pretend to delete from table */

union { double u_num; char *u_str; } v_union;

  1. define v_num v_union.u_num
  2. define v_str v_union.u_str

struct value *next; /* link all VALUEs in a list */ } VALUE;

void number_example(void) { ENTRY e, *ep; VALUE *all = NULL, *vp; int i, s; char buf[16];

puts("number_example:");

if (hcreate(50) == 0) fail("hcreate");

/* Add numeric values. */ { double keys[] = { 2, 3, 4, 5.6 }; double values[] = { 2.71828, 3.14159, 4.47214, 7.8 };

for (i = 0; i < 4; i++) { /* Must convert keys to strings. */ snprintf(buf, sizeof buf, "%.6g", keys[i]); if ((e.key = strdup(buf)) == NULL) fail("strdup");

/* Allocate a new VALUE. */ if ((vp = calloc(1, sizeof vp[0])) == NULL) fail("calloc"); vp->v_type = T_NUM; vp->v_num = values[i]; e.data = vp;

/* Remember to free it. */ vp->next = all; all = vp;

if (hsearch(e, ENTER) == NULL) fail("hsearch"); } }

/* * Add string values. * * For this example, all of my values will be static string * constants. This removes the need to free(vp->v_str) * when I replace or delete a value. */ { double keys[] = { 4, 8, 10 }; char *values[] = { "four", "eight", "ten" };

for (i = 0; i < 3; i++) { /* Must convert keys to strings. */ snprintf(buf, sizeof buf, "%.6g", keys[i]);

/* * This shows how to add or replace a value * in the table (so I can change an entry * from 4 => 4.47214 to 4 => "four"). */ e.key = buf; ep = hsearch(e, FIND); if (ep) { /* Already have key. Change value. */ vp = (VALUE *)ep->data; vp->v_type = T_STR; vp->v_str = values[i]; } else { /* Need to add key. */ if ((e.key = strdup(buf)) == NULL) fail("strdup");

if ((vp = calloc(1, sizeof vp[0])) == NULL) fail("calloc"); vp->v_type = T_STR; vp->v_str = values[i]; e.data = vp;

/* Remember to free it. */ vp->next = all; all = vp;

if (hsearch(e, ENTER) == NULL) fail("hsearch"); } } }

/* Delete key 8. */ snprintf(buf, sizeof buf, "%.6g", (double)8); e.key = buf; ep = hsearch(e, FIND); if (ep) { vp = (VALUE *)ep->data; vp->v_type = T_DEL; }

/* Check if keys exist. */ { double keys[] = { 2, 3, 4, 5.6, 7, 8, 10 };

for (i = 0; i < 7; i++) { snprintf(buf, sizeof buf, "%.6g", keys[i]); e.key = buf; ep = hsearch(e, FIND);

if (ep == NULL || (vp = (VALUE *)ep->data)->v_type & T_DEL) { printf("\t%s is not in the table\n", buf); } else if (vp->v_type & T_NUM) { printf("\t%s has value %g\n", buf, vp->v_num); } else if (vp->v_type & T_STR) { printf("\t%s has value '%s'\n", buf, vp->v_str); } else { printf("\t%s has invalid value\n", buf); } } }

/* Destroy table and keys. */ hdestroy();

/* Free all values. */ while (all != NULL) { vp = all->next; free(all); all = vp; } }

int main() { color_example(); number_example(); return 0; }</lang>

Output:

color_example:
        'blue' has value 0000ff
        'green' has value 00ff00
        'orange' is not in table
        'red' has value ff0000
number_example:
        2 has value 2.71828
        3 has value 3.14159
        4 has value 'four'
        5.6 has value 7.8
        7 is not in the table
        8 is not in the table
        10 has value 'ten'

BSD dbopen()

BSD provides dbopen() in <db.h>. This is Berkeley DB 1.85. Because dbopen() often puts a database on disk, one easily forgets that dbopen(NULL, ...) can put a small database in memory. When the type is DB_BTREE or DB_HASH, then the database is an associative array.

  • Warning: some GNU/Linux systems have a dbopen(3) manual page without a real dbopen() function. See Debian bug #337581.

The next example translates the previous example to dbopen(). Every key and value is a *void, needs a cast to the correct type. Because BSD also has <err.h>, I remove fail() and use err().

Library: BSD libc
Works with: OpenBSD version 4.8

<lang c>#include <sys/types.h>

  1. include <err.h> /* err() */
  2. include <fcntl.h>
  3. include <limits.h>
  4. include <stdio.h> /* printf(), puts() */
  5. include <string.h> /* memset() */
  6. include <db.h> /* dbopen() */

/*

* color_example:
*   Maps color strings to integer values, like "red" => 0xff0000.
*/

void color_example(void) { DB *db; DBT key, value; int i, r;

puts("color_example:");

/* Create an empty table. */ db = dbopen(NULL, O_RDWR, 0777, DB_HASH, NULL); if (db == NULL) err(1, "dbopen");

/* Add keys => values to table. */ { char *keys[] = { "red", "green", "blue" }; int values[] = { 0xff0000, 0x00ff00, 0x0000ff };

for (i = 0; i < 3; i++) { /* key.size must count the '\0' byte */ key.data = keys[i]; key.size = strlen(keys[i]) + 1; value.data = &values[i]; value.size = sizeof values[i];

/* * Insert key, value into table. DB will * allocate its own storage for key, value. */ if (db->put(db, &key, &value, 0) < 0) err(1, "db->put"); } }

/* Check if keys exist. */ { char *keys[] = { "blue", "green", "orange", "red" };

for (i = 0; i < 4; i++) { key.data = (void *)keys[i]; key.size = strlen(keys[i]) + 1;

r = db->get(db, &key, &value, 0); if (r < 0) err(1, "db->get"); else if (r > 0) { printf("\t'%s' is not in table\n", (char *)key.data); } else { printf("\t'%s' has value %06x\n", (char *)key.data, *(int *)value.data); } } }

/* Destroy table. */ db->close(db); }


/*

* number_example:
*   Maps numbers to strings or numbers, like 2 => 2.71828 or 4 => "four".
*
* First I need a VALUE that can either be a string or a number.
*/

typedef struct value { int v_type;

  1. define T_NUM 0x1 /* use v_num */
  2. define T_STR 0x2 /* use v_str */

union { double u_num; char *u_str; } v_union;

  1. define v_num v_union.u_num
  2. define v_str v_union.u_str

} VALUE;

void number_example(void) { DB *db; DBT key, value; VALUE v, *vp; double d; int i, r;

puts("number_example:");

db = dbopen(NULL, O_RDWR, 0777, DB_HASH, NULL); if (db == NULL) err(1, "dbopen");

/* Add numeric values. */ { double keys[] = { 2, 3, 4, 5.6 }; double values[] = { 2.71828, 3.14159, 4.47214, 7.8 };

for (i = 0; i < 4; i++) { memset(&v, 0, sizeof v); v.v_type = T_NUM; v.v_num = values[i];

key.data = &keys[i]; key.size = sizeof keys[i]; value.data = &v; value.size = sizeof v;

if (db->put(db, &key, &value, 0) < 0) err(1, "db->put"); } }

/* * Add string values. * * For this example, all of my values will be static string * constants. This removes the need to free(vp->v_str) * when I replace or delete a value. */ { double keys[] = { 4, 8, 10 }; char *values[] = { "four", "eight", "ten" };

for (i = 0; i < 3; i++) { memset(&v, 0, sizeof v); v.v_type = T_STR; v.v_str = values[i];

key.data = &keys[i]; key.size = sizeof keys[i]; value.data = &v; value.size = sizeof v;

/* * db->put can replace an entry (so I can change * it from 4 => 4.47214 to 4 => "four"). * * I am storing the strings outside the DB. * To put them inside the DB, I would need to * remove the v.v_str pointer and append each * string data to value.data. */ if (db->put(db, &key, &value, 0) < 0) err(1, "db->put"); } }

/* Delete key 8. */ d = 8; key.data = &d; key.size = sizeof d; r = db->del(db, &key, 0); if (r < 0) err(1, "db->del");

/* Iterate all keys. */ r = db->seq(db, &key, &value, R_FIRST); if (r < 0) err(1, "db->seq"); else if (r > 0) puts("\tno keys!"); else { printf("\tall keys: %g", *(double *)key.data); while ((r = db->seq(db, &key, &value, R_NEXT)) == 0) printf(", %g", *(double *)key.data); if (r < 0) err(1, "db->seq"); puts(""); }

/* Check if keys exist. */ { double keys[] = { 2, 3, 4, 5.6, 7, 8, 10 };

for (i = 0; i < 7; i++) { key.data = &keys[i]; key.size = sizeof keys[i];

r = db->get(db, &key, &value, 0); if (r < 0) err(1, "db->get"); else if (r > 0) { printf("\t%g is not in the table\n", keys[i]); continue; }

vp = (VALUE *)value.data; if (vp->v_type & T_NUM) { printf("\t%g has value %g\n", keys[i], vp->v_num); } else if (vp->v_type & T_STR) { printf("\t%g has value '%s'\n", keys[i], vp->v_str); } else { printf("\t%g has invalid value\n", keys[i]); } } }

db->close(db); }

int main() { color_example(); number_example(); return 0; }</lang>

Output:

color_example:
        'blue' has value 0000ff
        'green' has value 00ff00
        'orange' is not in table
        'red' has value ff0000
number_example:
        all keys: 2, 3, 5.6, 4, 10
        2 has value 2.71828
        3 has value 3.14159
        4 has value 'four'
        5.6 has value 7.8
        7 is not in the table
        8 is not in the table
        10 has value 'ten'

BSD sys/tree.h

Library: BSD libc
Works with: OpenBSD version 4.8

<lang c>#include <sys/tree.h>

  1. include <err.h> /* err() */
  2. include <stdio.h> /* printf(), puts() */
  3. include <stdlib.h> /* calloc(), free() */
  4. include <string.h> /* strcmp() */

/*

* color_example:
*   Maps color strings to integer values, like "red" => 0xff0000.
*
* I will use a red-black tree of type 'struct ctree', which contains
* nodes of type 'struct cnode'.
*/

struct cnode { RB_ENTRY(cnode) entry; char *key; int value; };

int cnodecmp(struct cnode *a, struct cnode *b) { return strcmp(a->key, b->key); }

RB_HEAD(ctree, cnode); RB_GENERATE(ctree, cnode, entry, cnodecmp)

void color_example(void) { struct ctree head = RB_INITIALIZER(&head); /* an empty tree */ struct cnode n, *np, *op; int i;

puts("color_example:");

/* Add keys => values to tree. */ { char *keys[] = { "red", "green", "blue" }; int values[] = { 0xff0000, 0x00ff00, 0x0000ff };

for (i = 0; i < 3; i++) { if ((np = calloc(1, sizeof np[0])) == NULL) err(1, "calloc"); np->key = keys[i]; np->value = values[i]; RB_INSERT(ctree, &head, np); } }

/* Check if keys exist. */ { char *keys[] = { "blue", "green", "orange", "red" };

for (i = 0; i < 4; i++) { n.key = keys[i]; np = RB_FIND(ctree, &head, &n);

if (np) { printf("\t'%s' has value %06x\n", np->key, np->value); } else { printf("\t'%s' is not in tree\n", n.key); } } }

/* Free tree. */ for (np = RB_MIN(ctree, &head); np != NULL; np = op) { op = RB_NEXT(ctree, &head, np); RB_REMOVE(ctree, &head, np); free(np); } }


/*

* number_example:
*   Maps numbers to strings or numbers, like 2 => 2.71828 or 4 => "four".
*
* This node has a value that can either be a string or a number.
*/

struct dnode { RB_ENTRY(dnode) entry; double key; int v_type;

  1. define T_NUM 0x1 /* use v_num */
  2. define T_STR 0x2 /* use v_str */

union { double u_num; char *u_str; } v_union;

  1. define v_num v_union.u_num
  2. define v_str v_union.u_str

};

int dnodecmp(struct dnode *a, struct dnode *b) { double aa = a->key; double bb = b->key; return (aa < bb) ? -1 : (aa > bb); }

RB_HEAD(dtree, dnode); RB_GENERATE(dtree, dnode, entry, dnodecmp)

void number_example(void) { struct dtree head = RB_INITIALIZER(&head); struct dnode n, *np, *op; int i;

puts("number_example:");

/* Add numeric values. */ { double keys[] = { 2, 3, 4, 5.6 }; double values[] = { 2.71828, 3.14159, 4.47214, 7.8 };

for (i = 0; i < 4; i++) { if ((np = calloc(1, sizeof np[0])) == NULL) err(1, "calloc"); np->key = keys[i]; np->v_type = T_NUM; np->v_num = values[i]; RB_INSERT(dtree, &head, np); } }

/* * Add string values. * * For this example, all of my values will be static string * constants. This removes the need to free(np->v_str) * when I replace or delete a value. */ { double keys[] = { 4, 8, 10 }; char *values[] = { "four", "eight", "ten" };

for (i = 0; i < 3; i++) { /* * This shows how to add or replace a value * in the tree (so I can change an entry * from 4 => 4.47214 to 4 => "four"). */ n.key = keys[i]; if (np = RB_FIND(dtree, &head, &n)) { np->v_type = T_STR; np->v_str = values[i]; } else if (np = calloc(1, sizeof np[0])) { np->key = keys[i]; np->v_type = T_STR; np->v_str = values[i]; RB_INSERT(dtree, &head, np); } else err(1, "calloc"); } }

/* Delete key 8. */ n.key = 8; if (np = RB_FIND(dtree, &head, &n)) RB_REMOVE(dtree, &head, np);

/* Iterate all keys. */ i = 1; RB_FOREACH(np, dtree, &head) { if (i) { printf("\tall keys: %g", np->key); i = 0; } else printf(", %g", np->key); } if (i) puts("\tno keys!"); else puts("");

/* Check if keys exist. */ { double keys[] = { 2, 3, 4, 5.6, 7, 8, 10 };

for (i = 0; i < 7; i++) { n.key = keys[i]; np = RB_FIND(dtree, &head, &n);

if (np == NULL) { printf("\t%g is not in the tree\n", keys[i]); } else if (np->v_type & T_NUM) { printf("\t%g has value %g\n", keys[i], np->v_num); } else if (np->v_type & T_STR) { printf("\t%g has value '%s'\n", keys[i], np->v_str); } else { printf("\t%g has invalid value\n", keys[i]); } } }

/* Free tree. */ for (np = RB_MIN(dtree, &head); np != NULL; np = op) { op = RB_NEXT(dtree, &head, np); RB_REMOVE(dtree, &head, np); free(np); } }

int main() { color_example(); number_example(); return 0; }</lang>

Output:

color_example:
        'blue' has value 0000ff
        'green' has value 00ff00
        'orange' is not in tree
        'red' has value ff0000
number_example:
        all keys: 2, 3, 4, 5.6, 10
        2 has value 2.71828
        3 has value 3.14159
        4 has value 'four'
        5.6 has value 7.8
        7 is not in the tree
        8 is not in the tree
        10 has value 'ten'