Exercise 6-4 (Word frequency)

Chapter_6     Exercise_6-3 hash     Exercise_6-5







Exercise 6-4     K&R, p. 143


Exercise 6-4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.




freq.c         download


#include <stdio.h> // for getchar(), putchar(), printf(), EOF, NULL
#include <string.h> // for strcmp(), strlen(), strcpy()
#include <ctype.h> // for isspace(), isalpha(), isalnum()

#define MAXWORD 100 // max word length
#define MAXNOWORDS 1000 // max no of words (with the same frequency)

struct tnode // tree node
{
char *word; // points to the text
int count; // number of occurrences
struct tnode *left; // left child (smaller)
struct tnode *right; // right child (bigger)
};

struct fnode // frequency node
{
int nw; // no of words with the same frequency, size of words[]
char *words[MAXNOWORDS]; // words with the same frequency
int count; // number of occurrences (frequency)
struct fnode *left; // left child (smaller)
struct fnode *right; // right child (bigger)
};

struct tnode * // add node to the tree
addtnode (struct tnode *, char *);

struct fnode * // add tnode to fnode (frequency) tree
addfnode (struct fnode *, struct tnode *);

struct fnode * // build a frequency tree from existing tree
addAllNodes (struct fnode *, struct tnode *);
void printftree(struct fnode *); // start with the root node
int getword(char *, int);

// word frequency count
int main()
{
int c;
struct tnode *root;
char word[MAXWORD];

root = NULL; // initialize

while ((c = getword(word, MAXWORD)) != EOF)
{
if (isalpha(c) || c == '_')
{root = addtnode(root, word);}
}

struct fnode *fr = NULL; // root of the frequency tree

fr = addAllNodes(fr, root);

printftree(fr);

return 0;
}

struct tnode *
nalloc(void); // allocate tree node

// duplicate string:
char * strDup(char *); // strdup() is declared by string.h

struct tnode *
addtnode(struct tnode *p, char *w)
{
int cond;

if (p == NULL) // a new word has arrived
{
p = nalloc(); // make a new node
p->word = strDup(w);
p->count = 1;
p->left = p->right = NULL;
}
else if ((cond = strcmp(w, p->word)) == 0)
{p->count++;} // repeated word
else if (cond < 0) // less than into left subtree
{p->left = addtnode(p->left, w);}
else // greater than into right subtree
{p->right = addtnode(p->right, w);}

return p;
}

struct fnode *
falloc(void); // allocate frequency node

struct fnode *
addfnode(struct fnode *p, struct tnode *t)
{
if (p == NULL) // a new frequency has arrived
{
p = falloc(); // make a new frequency node
p->count = t->count;
p->words[0] = t->word; // no need to allocate space here
p->nw = 1;
p->left = p->right = NULL;
}
else if (p->count == t->count)
{ // repeated frequency, different words
p->words[p->nw++] = t->word; // no need to allocate space
}
else if (p->count < t->count) // greater than into left subtree
{p->left = addfnode(p->left, t);}
else // left than into right subtree
{p->right = addfnode(p->right, t);}

return p;
}

struct fnode * // build a frequency tree from existing tree
addAllNodes (struct fnode *fr, struct tnode *tr)
{
if (tr != NULL)
{ // pre-order traversal of tree (node, left, right)
fr = addfnode(fr, tr);
fr = addAllNodes(fr, tr->left);
fr = addAllNodes(fr, tr->right);
}
}

// in-order print of tree with root p (left, node, right)
void printftree (struct fnode *p)
{
int i;

if (p != NULL)
{
printftree(p->left);

printf("(%d: ", p->count);
for (i = 0; i < p->nw - 1; i++)
{printf("%s,%s", p->words[i], (i % 5 == 4) ? "\n" : " ");}
printf("%s)\n", p->words[i]); // p->words[p->nw - 1]

printftree(p->right);
}
}

#include <stdlib.h> // for malloc()

struct tnode *
nalloc(void) // allocate space for a tree node
{ // make a tnode
return (struct tnode *) malloc (sizeof(struct tnode));
}

struct fnode *
falloc(void) // allocate space for a frequency node
{ // make a fnode
return (struct fnode *) malloc (sizeof(struct fnode));
}

char * strDup(char *s) // make a duplicate of s
{
char *p;
// we assume a char is stored on a byte
p = (char *) malloc (strlen(s) + 1); // +1 for ending '\0'

if (p != NULL)
{strcpy(p, s);}

return p;
}

int getch(void);
void ungetch(int);

// get next word or character from input
int getword(char *word, int lim)
{
int c;
char *w = word;

while (isspace(c = getch()))
{} // skip beginning whitespace

if (c != EOF)
{*w++ = c;}

if (!isalpha(c) && c != '_')
{
*w = '\0';
return c;
}

for ( ; --lim > 0; w++)
{
if (!isalnum(*w = getch()) && *w != '_')
{
ungetch(*w);
break; // out of for(), w++ is not executed
}
}

*w = '\0';
return word[0];
}

// buffer for ungetch():
int buf = EOF-1; // not a real character, not even EOF

int getch(void) // get a (possibly pushed-back) character
{
if (buf < EOF)
{
return getchar();
}

int temp = buf; // buf >= EOF
buf = EOF-1; // reset buf

return temp;
}
// push character back on input (make it available for the next getch()):
void ungetch(int c)
{
buf = c;
}
/*
gcc freq.c -o freq
./freq < freq.c
(48: p)
(35: struct)
(21: fnode)
(19: int, tnode)
(18: left)
(17: right, word)
(16: char, a, c, frequency)
(15: if, w)
(13: for, count, node, tree)
(12: fr, words)
(11: NULL, of, return, void)
(9: EOF, root, the, t)
(8: buf, i, s)
(7: allocate, else, make)
(6: h, addtnode, addfnode, addAllNodes, getch,
nw, printftree, with, to, tr)
(5: include, child, character, freq, printf,
into, malloc, no, new, space,
subtree, than, ungetch)
(4: MAXWORD, _, cond, falloc, from,
getword, isalpha, is, nalloc, not,
same, string, strDup)
(3: MAXNOWORDS, bigger, add, arrived, back,
build, define, existing, duplicate, getchar,
get, greater, has, isspace, isalnum,
input, max, lim, n, need,
next, number, occurrences, order, on,
repeated, smaller, sizeof, strcmp, strcpy,
strlen, temp, while)
(2: ctype, assume, beginning, available, break,
buffer, by, byte, declared, d,
different, ending, executed, even, gcc,
here, in, stdio, initialize, length,
it, main, less, o, points,
or, out, pre, possibly, print,
putchar, pushed, push, real, reset,
size, skip, start, stdlib, stored,
strdup, text, traversal, we, whitespace)
*/









Chapter_6     Exercise_6-3 BACK_TO_TOP hash     Exercise_6-5



Comments

Popular posts from this blog

Contents

Blogger Page Margins in Contempo