Rcjp's Weblog

October 6, 2007

Hash Tables

Filed under: c — rcjp @ 11:21 am

Just a quick note on how we used to use hash tables for the compiler code at Salford:

#include <stdio.h>
#include <stdlib.h>

/* point this at a debugging malloc if required */
#define newm_array(T,N) (T*)malloc(sizeof(T)*(N))
#define HASHSIZE    1024

struct symbol_list {
   symbol_list * next;
   int symbol ;
   char* name ;
   int size ;

symbol_list** base;

int main()
    ... we have some name we want to hash...

    base=newm_array(symbol_list*, HASHSIZE);

    /* clear hash table */
    while(i<HASHSIZE) {

    int ih=0;
    char* p=name;
    while (*p) {       /* compute hash */
    symbol_list* x=base[ih];
    while (x != NULL) x=x->next; /* find empty chain */

    ... set x = name...

though might be better to make some functions

static unsigned hash(char *s)
    unsigned hashval;

    for (hashval = 0; *s != ''; s++)
        hashval = *s + 31*hashval;

    return hashval % HASHSIZE;

static entry *lookup(char *s)
    entry *np;

    for (np = hashtab[hash(s)]; np != null; np = np->next)
        if (wstrcmp(s, np->name) == 0)
            return np;
    return null;

October 1, 2007

Simple diff of 2 files

Filed under: c, lisp, python — rcjp @ 7:59 pm

A few days ago I wanted to compare two files each of which had one word per line (they were completion files for the rlwrap utility incidentally) thats easy enough with the unix shell commands, infact you can do it in one line

diff -iyw --suppress-common-lines <(sort -f file1) <(sort -f file2)

but I thought as a quick programming exercise I’d do it in C++/C, python and Common Lisp…

Calculating the difference is very easy using C++’s set_difference etc. but it is a surprise, I think for most programmers anyway, that you have to write your own case insensitive string comparison function. C++ sure is a peculiar mix of high and low level programming.

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <iterator>
#include <functional>

// not sure there is any advantage to inherit from binary_function here?
// (infact we could just define a function rather than a struct and operator)
struct lessthan_nocase :
  public std::binary_function<const std::string&, const std::string&, bool>
    bool operator()(const std::string& s1, const std::string& s2) const
        std::string::const_iterator p1 = s1.begin();
        std::string::const_iterator p2 = s2.begin();

        while(p1 != s1.end() && p2 != s2.end()) {
            if (toupper(*p1) != toupper(*p2)) return toupper(*p1) < toupper(*p2);
        return s1.size() < s2.size();

// set_difference only work on sorted containers
void sorted_readfile(const char* file, std::vector<std::string>& fvec)
    std::ifstream f(file);
    std::istream_iterator<std::string> finput(f), fend;

    copy(finput, fend, back_inserter(fvec));
    sort(fvec.begin(), fvec.end(), lessthan_nocase());

// dump results (show words if < MAXSHOW)
const unsigned int MAXSHOW = 10;

void display_diff(std::string title, std::vector<std::string> v)
    std::cout << title;
    if (v.size() < MAXSHOW) {
        std::cout << std::endl << std::string(title.size(), '-') << std::endl;
        copy(v.begin(), v.end(),
             std::ostream_iterator<std::string>(std::cout, "\n"));
    } else {
        std::cout << " =  " << v.size() << " words" << std::endl;
    std::cout << std::endl;

int main(int argc, char* argv[])
    if (argc != 3) {
        std::cerr << "Usage: tdiff filename1 filename2" << std::endl;

    std::vector<std::string> f1;
    sorted_readfile(argv[1], f1);

    std::vector<std::string> f2;
    sorted_readfile(argv[2], f2);

    std::vector<std::string> notinf1;
    set_difference(f1.begin(), f1.end(), f2.begin(), f2.end(),
            back_inserter(notinf1), lessthan_nocase());
    display_diff("words in 1st but not in 2nd", notinf1);

    std::vector<std::string> notinf2;
    set_difference(f2.begin(), f2.end(), f1.begin(), f1.end(),
            back_inserter(notinf2), lessthan_nocase());
    display_diff("words in 2nd but not in 1st", notinf2);

    std::vector<std::string> symdiff;
    set_symmetric_difference(f2.begin(), f2.end(), f1.begin(), f1.end(),
            back_inserter(symdiff), lessthan_nocase());
    display_diff("symmetric difference", symdiff);

    std::vector<std::string> inter;
    set_intersection(f2.begin(), f2.end(), f1.begin(), f1.end(),
            back_inserter(inter), lessthan_nocase());
    display_diff("intersection", inter);

    return 0;

In C, the only thing to trip you up is getting the casting of the void * pointers in strcmp_nocase correct before trying to dereference them. Also I’ve cheated and used the non-ansi strcasecmp (sometimes called stricmp).

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAXLINE  256
#define MAXWORDS 10000
#define MAXSHOW  10

typedef enum {false, true} bool;

bool strempty(char *str)
    char *p = str;
    while (*p) if (!isspace(*p++)) return false;
    return true;

int strcmp_nocase(const void *s1, const void *s2)
    /* The actual arguments to this function are "pointers to
       pointers to char", but strcmp() arguments are "pointers
       to char", hence the following cast plus dereference */
    return strcasecmp( *(char * const *)s1, * (char * const *) s2);

int sorted_readfile(char *filename, char **words)
    FILE *f = fopen(filename, "r");

    if (!f) {
        fprintf(stderr, "can't open %s\n", filename);

    int count = 0;
    char line[MAXLINE], *q;
    while (fgets(line, MAXLINE, f) && (count < MAXWORDS)) {
        if ((q=strpbrk(line,"\r\n"))) *q=0;     /* words dont include newlines */
        if (strempty(line)) continue;           /* skip blank lines */
        words[count] = (char *) malloc(strlen(line)+1);
        /* or words[count++] = strdup(line)*/
        strcpy(words[count], line);


    if (count == MAXWORDS) {
        fprintf(stderr, "increase MAXWORDS buffer size\n");
    qsort(words, count, sizeof(char *), strcmp_nocase);
    return count;

void display_diff(char *title, char **v, int vsize)
    int i;
    printf("%s", title);

    if (vsize < MAXSHOW) {
        for (i=0; i<strlen(title); i++) putchar('-');
        for (i=0; i<vsize; i++)
            printf("%s\n", v[i]);
    } else {
        printf(" = %d words\n", vsize);

int main(int argc, char *argv[])
    if (argc != 3) {
        fprintf(stderr, "Usage: tdiff filename1 filename2\n");
    int i;
    char **f1words;
    f1words = malloc(MAXWORDS*sizeof(char*));
    int nf1words = sorted_readfile(argv[1], f1words);

    char **f2words;
    f2words = malloc(MAXWORDS*sizeof(char*));
    int nf2words = sorted_readfile(argv[2], f2words);

    char **diff;
    int ndiff = 0;
    diff = malloc(MAXWORDS*sizeof(char*));
    for (i=0; i < nf1words; i++) {
        if (!bsearch(&f1words[i], f2words, nf2words,
                     sizeof(char*), strcmp_nocase)) {
            /* could just point into f1words instead */
            diff[ndiff++] = strdup(f1words[i]);
    display_diff("words in 1st but not in 2nd", diff, ndiff);

    return 0;

In python, most of it is easy, though its perhaps not very pythonesque to cram some things on one line like I’ve done here. Making set case independent requires the __hash__ and __eq__ functions (I think that is all we need in this case) get overridden to use a saved lowercase version of the supplied string (see the python reference) .

import string

class NoCaseStr(str):
    def __init__(self, s):
        str.__init__(self, s)
        # keep a copy of the lower case string
        self.loweredstr = s.lower()

    def __eq__(self, s):
        return self.loweredstr == s.lower()

    def __hash__(self):
        return hash(self.loweredstr)

def display_diff(title, dset, Maxshow=10):
    if len(dset) < Maxshow:
        print title
        print len(title) * '-'
        print '\n'.join(dset)
        print title, '=', len(dset)

def read_words(f):
    w = set(NoCaseStr(string.strip(word)) for word in open(f).readlines())
    w.discard('')# slightly clumsy... '  \n' gets stripped to '' so discard it
    return w

def tdiff(f1, f2):
    w1 = read_words(f1)
    w2 = read_words(f2)
    display_diff('in 1st but not in 2nd', w1.difference(w2))
    display_diff('in 2nd but not in 1st', w2.difference(w1))
    display_diff('symmetric difference',  w1.symmetric_difference(w2))
    display_diff('intersection',          w1.intersection(w2))

if __name__ == '__main__':
    import sys
    if len(sys.argv) < 3:
        print 'Usage: %s file1 file2' % sys.argv[0]

    tdiff(sys.argv[1], sys.argv[2])

My favourite language – Common Lisp has everything you need to do with without overriding anything. string-equal is case insensitive (as opposed to string=)

(defun read-words (file)
  (with-open-file (str file)
    (loop for line = (read-line str nil nil)
       for word = (string-trim " " line)
       while line
       unless (string= word "")
       collect word)))

(defun display-diff (title diff &optional (max-show 10))
  (format t "~A: ~V{~A~^ ~}~%" title max-show diff))

(defun tdiff (f1 f2)
  (let ((w1 (read-words f1))
        (w2 (read-words f2)))
    (display-diff "in 1st but not in 2nd" (set-difference w1 w2 :test #'string-equal))
    (display-diff "in 2nd but not in 1st" (set-difference w2 w1 :test #'string-equal))
    (display-diff "intersection" (intersection w1 w2 :test #'string-equal))))

(tdiff "/home/r/tmp/f1" "/home/r/tmp/f2")

June 8, 2007

gtk containers

Filed under: c — rcjp @ 7:05 pm

The gtk glib library includes some standard container like linked lists…

/* Compile with cc doubly.c  `pkg-config --cflags --libs gtk+-2.0`  */

#include <glib.h>
#include <string.h>
#include <stdio.h>

void myfunc(gpointer data, gpointer user_data)
    printf("value %s\n", (char *)data);

int main()
    GList *list = NULL;
    GList *it = NULL;

    list = g_list_append(list, "one");
    list = g_list_append(list, "two");
    list = g_list_append(list, "three");

    for (it = list; it != NULL; it = it->next)
        printf("looping - %s\n", (char *)it->data);

    /* or alternatively  */

    g_list_foreach(list, (GFunc) myfunc, NULL);

    printf("length of list is %d\n", g_list_length(list ));
    return 0;

May 10, 2007

Sign Extending Unsigned Numbers

Filed under: c — rcjp @ 2:26 pm

Just playing about really, but I thought it would be worth re-affirming in my brain that you have to be careful using assembler instructions like CLW, CLQ etc. when dealing with unsigned numbers since they propagate the sign bit.:

    #include <stdio.h>
    void extend(unsigned char x)
        int y;
        asm ("xor %%eax, %%eax;\n\t"      /* clear EAX */
             "movb %1, %%al;\n\t"         /* move the byte x into AL */
             "movl %%eax, %0;\n\t"
             :"=r"(y)                     /* output only */
             :"r"(x)                      /* input */
             :"%eax");                    /* clobbered regs */

        printf("x=%d, cbw extended=%d\n", x, y);

    int main(void)

        return 0;


    ~/c/tmp> ./a.out
    x=127, cbw extended=127
    x=128, cbw extended=65408

there is no binary flag for printf in C, but in Common Lisp:

    CL-USER> (format t "~20B~%" 128)
    CL-USER> (format t "~20B~%" 65408)

Instead of doing it in inline C, I thought I’d try nasm + gcc

            extern  printf

            SECTION .data
    fmt:    db "cbw %d (previously %d)", 10, 0 

            SECTION .text
            global main        ; needed for gcc 
            push    ebp
            mov     ebp,esp

            xor     eax,eax
            mov     ax, 128
            push    eax
            push    eax
            push    dword fmt
            call    printf

            add     esp, 12    ; pop 3 pushes
            mov     esp, ebp   ; clean stack frame
            pop     ebp
            mov     eax,0      ; no error return

building and running with

    ~/c/tmp> nasm -f elf nasm2.asm
    ~/c/tmp> gcc -o nasm2 nasm2.o
    ~/c/tmp> nasm2
    cbw 65408 (previously 128)

April 27, 2007

Finding open oflag Settings

Filed under: c, python — rcjp @ 11:03 am

In tracings you see the numerical value for flags like the oflag settings to the C open commands e.g.

    5901 open("/proc/asound/cards", 32768, 0666) = 7

to find out what they mean I just quickly did:

    In [36]: def oflag_to_string(oflag):
       ....:     for c in dir(os):
       ....:         if c.startswith('O_'):
       ....:             if eval('os.'+c) & oflag:
       ....:                 print c

    In [37]: oflag_to_string(32768)

but afterwards realised I could have just done an strace since that nicely converts the values for you.

January 15, 2007

Swapping Spaces for Punctuation

Filed under: c, lisp, python — rcjp @ 10:17 am

Just a quicky. In Common Lisp…

    (defun spacify-punc (string)
      (substitute-if #\Space #'punc-p string))

    (defun punc-p (char) (find char "*,.:;-()"))

then use it…

    CL-USER> (spacify-punc "this, string. and: more")
    "this  string  and  more"

in python…

    def spacify_punc(char):
        if char not in "*,.:;-()":
            return char
            return ' '

    print ''.join(spacify_punc(c) for c in "this, string. and: more")

in C possibly something like…

    #include <stdio.h>
    #include <string.h>

    int main()
      char words[] = "this, string. and: more";
      char *answer = strdup(words);
      unsigned int i;

      /* using sizeof (instead of strlen) so we include the NULL */
      for (i=0; i < sizeof(words); i++)
        answer[i] = strchr("*,.:;-()", words[i]) ? ' ': words[i];

      printf("'%s' becomes '%s'\n", words, answer);
      return 0;

All three languages have functions to detect punctuation, but we are only looking for a subset in this case.

I think I like the CL solution best but the order of args to substitute-if always throws me, thank heavens for SLIME’s argument prompting.

January 6, 2007

Twenty Questions Game in C

Filed under: c — rcjp @ 3:33 pm

Tree traversal is really really easy in Common Lisp so for a bit more of a challenge I wrote this in C. The game is for the user to think of an object and answer questions while the program tries to guess. The code is just a binary search tree, but you have to look ahead to see if you need to add in a piece of tree rather than just a straighforward leaf node.

    #include <stdio.h>
    #include <string.h>
    #include <stdbool.h>
    #include <stdlib.h>

    #define LINE_MAX 512

    typedef struct node NODE;
    struct node {
        char qu[LINE_MAX];
        NODE *yes, *no;

     * print message and read stdin into LINE_MAX reply buffer
     * trimming newline
    void getreply(char *message, char reply[LINE_MAX])
        fgets(reply, LINE_MAX, stdin);
        strtok(reply, "\n");        /* trim off \n */

    bool ask(char *question, bool object)
        char line[LINE_MAX], reply[LINE_MAX], qu[LINE_MAX];

        if (object)
            sprintf(qu, "Is it %s?", question);
            strcpy(qu, question);
        getreply(qu, line);
        sscanf(line, "%s", reply);
        if (!strcmp(reply, "yes") || !strcmp(reply, "y"))
            return true;
        return false;

    int main()
        NODE info = { "Is it an animal?",
            &(NODE) {"Is it slimy?",
                     &(NODE) {"a fish", NULL, NULL},
                     &(NODE) {"a bear", NULL, NULL}},

        char object[LINE_MAX], clarify[LINE_MAX];
        bool yesno;
        do {
            NODE *curr = &info, *next, *newnode;
            while (curr) {
                yesno = ask(curr->qu, false);
                next = yesno ? curr->yes : curr->no;
                if (!next) {
                     * we need to add a leaf 
                    curr->no = malloc(sizeof(NODE));
                    getreply("What is it? ", curr->no->qu);
                if (!next->yes && !next->no) {
                     * we might need to add a new node in here 
                    if (ask(next->qu, true))
                        break;      /* the leaf was right.. new game */

                    newnode = malloc(sizeof(NODE));
                    getreply("What is it? ", object);
                    sprintf(clarify, "What question distiguishes %s from %s?",
                            object, next->qu);
                    getreply(clarify, newnode->qu);

                    /* check which way round this new question applies */
                    sprintf(clarify, "A %s, %s", object, newnode->qu);
                    if (ask(clarify, false)) {
                        newnode->yes = malloc(sizeof(NODE));
                        strcpy(newnode->yes->qu, object);
                        newnode->no = next;
                    } else {
                        newnode->no = malloc(sizeof(NODE));
                        strcpy(newnode->no->qu, object);
                        newnode->yes = next;
                    if (yesno)
                        curr->yes = newnode;
                        curr->no = newnode;
                curr = next;
        while (ask("New Game", false));
        return 0;

    /*   Sample run...

        Is it an animal?
        Is it slimy?
        Is it a bear?
        New Game

        Is it an animal?
        What is it?
        a box
        New Game

        Is it an animal?
        Is it a box?
        What is it?
        a banana
        What question distiguishes a banana from a box?
        is it yellow?
        A a banana, is it yellow?
        New Game

        Is it an animal?
        is it yellow?
        Is it a banana?
        What is it?
        the sun
        What question distiguishes the sun from a banana?
        is it cold?
        A the sun, is it cold?
        New Game

        Is it an animal?
        is it yellow?
        is it cold?
        Is it the sun?
        New Game

I started thinking about how this would look in a C++ object type program but, to be honest, I don’t enjoy trying to think out the right class structure; and what is annoying is that you can’t start coding and explore the problem as you code, because you’ll come to a grinding halt if you’ve got the structure wrong. I guess I just don’t think OOP is fun coding, at least not in C++.

December 30, 2006

Lyaponov Exponent

Filed under: c — rcjp @ 6:42 pm

Most introductions to chaos theory will talk about the logistic mapping x_{n+1}=r\, x_n(1-x_n) and then calculate the lyapunov exponent which is a measure of how rapidly small deviations in the initial conditions give exponential changes in results giving an indication how chaotic the system is. It is calculated from \lambda = \frac{1}{N} \sum^N_{n=1} \log(\frac{d x_{n+1}}{d x_n}) The program below calculates this for 2.95<r<4.0 after iterating for 1000 times to stabalise things then using N=30.

The code is slightly contrived; I wrote it to play with passing functions through C++ templates. The idea being that logis and deriv (its derivative) since they are templated, can get inlined by the compiler; though Id’ guess a much more straightforward C program would probably optimise just as well. In addition the std::bind1st(std::mem_fun(&IterMap::lyaponov_func),this) didn’t exactly trip off my fingers.

#include <iostream>
#include <fstream>
#include <algorithm>
#include <vector>
#include <numeric>
#include <functional>
#include <cmath>

typedef double (*CALC)(double, double);

class IterMap
    std::vector<double> data;
    const int nstabalise;
    const int nrecord;
    double r;
    IterMap(int rec, int stab=1000) : nstabalise(stab), nrecord(rec) {}
    template<CALC derivfunc>double lyaponov_func(double x);
    template<CALC mapfunc> void stabalise(double x);
    template<CALC mapfunc, CALC derivfunc> double lyapunov_exp(double r);

template<CALC mapfunc> void IterMap::stabalise(double x)
    for(int i = 0; i < nstabalise; ++i)
        x = mapfunc(r, x);

    for(int i = 0; i < nrecord; ++i) {
        x = mapfunc(r, x);

template<CALC derivfunc> double IterMap::lyaponov_func(double x)
    return log(fabs( derivfunc(r,x) ));

template<CALC mapfunc, CALC derivfunc> double IterMap::lyapunov_exp(double ra)
    r = ra;

    // used mem_func, else have to declare func static

    transform(data.begin(), data.end(), data.begin(),
    double avg = accumulate(data.begin(), data.end(), 0.0) / data.size();
    return avg;


double logis(double r, double x)
    return r * x * (1.0 - x);

double deriv(double r, double x)
    return r - 2.0*r*x;

int main(int argc, char* argv[])
    std::ofstream out("ldata", std::ios::out);
    IterMap bir(30);
    for (double r = 2.95; r <= 4.0; r+=0.0005)
        out << r << " " << bir.lyapunov_exp<logis,deriv>(r) << std::endl;

    return 0;

December 29, 2006

Quick C++ Word Count Trick

Filed under: c — rcjp @ 2:19 pm

Probably not really useful in actual code but

#include <iostream>
#include <iterator>
#include <string>

typedef std::istream_iterator<std::string> in;

int main()
    std::cout << std::distance(in(std::cin), in());

will print the number of words piped in.

December 22, 2006

String Searching/Replacing

Filed under: c, lisp, python — rcjp @ 9:04 am

just some quick notes on how various languages handle replacing/splicing a string…

In C++ chopping, searching and replacing in a string is fairly easy

    #include <iostream>
    #include <string>

    int main()
        std::string s = "some one with more than one that ones.";

        s.replace(s.find("one"), 3, "three");

        std::cout << s << std::endl;

in straight C doing the same thing is a bit more fiddly

    #include <stdio.h>
    #include <string.h>

    int main()
        char* s = "some one with more than one that ones.";

        char buf[255];
        strcpy(buf, s+3);  /* chop off the first 3 chars */

        char *p = strstr(buf, "one");
        if (p)
            char tmp[255];
            *p = (char) 0;
            strcpy(tmp, buf);
            strcat(tmp, "three");
            strcat(tmp, p+3);  /* skip over length of "one" */
            strcpy(buf, tmp);

        printf("%s\n", buf);
        return 0;

you have to think about the size of the temporary buffer unless you malloc something based on the size of s – but then you have to know how much bigger your operations will make the string.

Perhaps suprisingly there isn’t any standard function to replace strings in Common Lisp – I guess its one of those things your are supposed to deal with yourself since if you know you your replacement word is the same size you can destructively alter the string (using setf on the subseq), otherwise you have to build a new string (since you may be replacing a word with a bigger word). So CL leaves things to you to figure out the best approach.

You can get the position of a string within another with

    * (search "one" "there one is one more than ones")

and then build up the string a la C…

    (let* ((str "some one with more than one that ones.")
           (word "one")
           (p (search word str)))
      (if p
          (concatenate 'string (subseq str 3 p)
                               (subseq str (+ p (length word))))

In python

    In [3]: "there one is one more than ones".replace("one", "three", 1)
    Out[3]: 'there three is one more than ones'

where we are using a count=1 otherwise it would replace all instances. We can even chop off the first three chars and then replace all in one go

    In [5]: "there one is one more than ones"[3:].replace("one", "three")
    Out[5]: 're three is three more than threes'

Replacing all occurances in C++ is a bit more work

    std::string::size_type n;  // or   size_t n; 
    std::string word = "one";
    while((n=s.find(word)) != std::string::npos)
        s.replace(n, word.size(), "three");

Replacing all strings in C or Common Lisp is quite alot more work and probably better to write a utility function and keep it somewhere or use a library.
http://en.wikibooks.org/wiki/Programming:Common_Lisp/Strings has

    (defun replace-all (string part replacement &key (test #'char=))
      (with-output-to-string (out)
        (loop with part-length = (length part)
           for old-pos = 0 then (+ pos part-length)
           for pos = (search part string :start2 old-pos :test test)
             (write-string string out :start old-pos
                                      :end (or pos (length string)))
           when pos do
             (write-string replacement out)
           while pos)))

or maybe use cl-ppcre

Older Posts »

Create a free website or blog at WordPress.com.