Exercise 5-17 (lines - Field-handling sort, all options)

Chapter_5     Exercise_5-16 Index_program     Exercise_5-18







Exercise 5-17     K&R, p. 121


Exercise 5-17. Add a field-handling capability, so sorting may be done on fields within lines, each field sorted according to an independent set of options. (The index for this book was sorted with -df for the index category and -n for the page numbers.)




CONTENTS:     unsorted.txt     lines.c




unsorted.txt     K&R, p. 263-271 (Index)         download


call by value 95
command-line arguments 116
<< left shift operator 206
a.out 6
< less than operator 41
alloc function 101
a.out 70
<< left shift operator 49
call by reference 27
command-line arguments 118
<ctype.h> header 248
temperature conversion program 12
command-line arguments 114
<= less or equal operator 206
increment operator, ++ 203
temperature conversion program 9
default label 222
definition, function 69
pointer to function 201
#define, multi-line 89
call by value 202
++ increment operator 18
command-line arguments 115
call by value 27
\t tab character 193
++ increment operator 46
UNIX file system 169
++ increment operator 203
definition, function 25
< less than operator 206
#define with arguments 89
<ctype.h> header 43
increment operator, ++ 106
pointer, null 102
temperature conversion program 13
definition, function 225
pointer to function 118
increment operator, ++ 18
\t tab character 11
default label 58
increment operator, ++ 46
less than operator, < 206
pointer initialization 102
left shift operator, << 206
temperature conversion program 8
less than operator, < 41
pointer initialization 138
command-line arguments 117
++ increment operator 106
pointer to function 147
pointer, null 198
\t tab character 38
temperature conversion program 15
UNIX file system 179
\t tab character 8
<= less or equal operator 41
left shift operator, << 49











lines.c         download


#include <stdio.h> // for getchar(). printf(), EOF, NULL, FILE, fopen(), fclose()
#include <string.h> // for strcat(), strcpy(), strcmp()
#include <ctype.h> // for isdigit(), isalpha(), isupper(), islower(), isalnum()

#define MAXLINES 5000 // max no of lines to be sorted
#define MAXLEN 1000 // max line length
#define MAXFNLEN 100 // max file name length

void initField(int *field, int c, char **s);
void printUsage(void);

int readlines (char *lineptr[], int nlines);
void writelines (char *lineptr[], int nlines, char *filename);

int strfcmp(char*, char*); // case insensitive (fold upper and lowercase)
int strdcmp(char*, char*); // directory order comparison
int strfdcmp(char*, char*); // case insensitive, directory order
int numcmp(char*, char*); // numeric comparison

void qSort1(void *lineptr[], int left, int right, // qsort() is declared in stdlib.h
int (*comp)(void*, void*)); // comparison function,
// strcmp(), strfcmp(), strdcmp(), strfdcmp(), or numcmp()
void qSort2(void *lineptr[], int left, int right, int *fields,
int (*lexComp)(void*, void*), int (*numComp)(void*, void*));
// lexComp() can be strcmp(), strfcmp(), strdcmp(), or strfdcmp()
// numComp() can be numcmp() or NULL
// first do lexicographic sort, then numeric sort on 'equal' lex compared lines
// if (strcmp(line1) == strcmp(line2)), perform numeric sort on a field
int main(int argc, char *argv[])
{ // sorting can be generalized to n fields
int fields[] = {0, 0}; // fields has size 2 (lexicographic and numeric)
// lexicographic sort: strcmp(), strfcmp(), strdcmp(), strfdcmp()
// fields within each line, delimited by ' ' and '\t',
// null fields means regular sort
int orders[3] = {0}; // alternative initialization
// orders: fold (case insensitive), directory, numeric

int c;

while (--argc > 0 && (*++argv)[0] == '-') // optional arguments
{
while (c = *++argv[0]) // -f -d1 -n-1, -f+0 -d -n-1, -fd1, -n2, etc.
{
switch(c)
{
case 'f' :
orders[0] = 1; // case insensitive (fold) order
initField(fields, c, argv); // &fields[0]
break;
case 'd' :
orders[1] = 1; // directory order
initField(fields, c, argv); // &fields[0]
break;
case 'n' :
orders[2] = 1; // numeric order
initField(fields+1, c, argv); // &fields[1]
break;
default:
printf("Illegal option: '%c'\n", c);
printUsage();
return 1; // end program, signalling error
}
}
}

if (argc) // if (argc > 0)
{
printUsage();
return 1; // end program, signalling error
}

if (orders[2] && !fields[1] && // numeric for the entire line
(orders[0] || orders[1]) && !fields[0]) // fold or dir or both for the entire line
{
printf("\"-f\", \"-d\" work together, but not with \"-n\"\n");
printUsage();
return 1; // end program, signalling error
}

if (fields[1] && (fields[0] == fields[1])) // fields[0] == fields[1] > 0
{ // fields[1] > 0 implies orders[2] >0,
// fields[0] > 0 implies (orders[0] || orders[1]) > 0
printf("\"-f\", \"-d\" cannot overlap with \"-n\" over the same field\n");
printUsage();
return 1; // end program, signalling error
}

char *lineptr[MAXLINES];
int nlines; // no of input lines read

char filename[MAXFNLEN];
int i = 0;
if (orders[0]) // case insensitive sort
{filename[i++] = 'f';}
if (orders[1]) // directory order sort
{filename[i++] = 'd';}
if (orders[2]) // numeric sort
{filename[i++] = 'n';}
filename[i] = '\0';
strcat(filename, "sort.txt");

int (*comp)(void*, void*);
int (*lexComp)(void*, void*), (*numComp)(void*, void*) = NULL;

if ((nlines = readlines(lineptr, MAXLINES)) >= 0)
{
if (fields[0] == 0 && fields[1] == 0) // regular sort:
{ // lexicographic or numeric sort for the entire line
if (orders[0] && orders[1]) // case insensitive, directory order
{comp = (int (*)(void*, void*))strfdcmp;}
else if (orders[0]) // case insensitive (fold)
{comp = (int (*)(void*, void*))strfcmp;}
else if (orders[1]) // directory order
{comp = (int (*)(void*, void*))strdcmp;}
else if (orders[2]) // numeric order
{comp = (int (*)(void*, void*))numcmp;}
else {comp = (int (*)(void*, void*))strcmp;}

qSort1((void**)lineptr, 0, nlines-1, comp);
}
else
{
if (orders[0] && orders[1]) // case insensitive, directory order
{lexComp = (int (*)(void*, void*))strfdcmp;}
else if (orders[0]) // case insensitive (fold)
{lexComp = (int (*)(void*, void*))strfcmp;}
else if (orders[1]) // directory order
{lexComp = (int (*)(void*, void*))strdcmp;}
else {lexComp = (int (*)(void*, void*))strcmp;}

if (orders[2]) // numeric order
{numComp = (int (*)(void*, void*))numcmp;}

qSort2((void**)lineptr, 0, nlines-1, fields, lexComp, numComp);
}

writelines(lineptr, nlines, filename);
}
else
{
printf("Error: input too big to sort\n");
return 1; // exit program, signalling error
}

return 0;
}

void initField(int *field, int c, char **s)
{
c = *++s[0];

if (c == '\0')
{
--s[0]; // ungetch()
return;
}
if (c != '+' && c != '-' && !isdigit(c))
{
--s[0]; // ungetch()
return;
}

int sign = 1;
if (c == '-') {sign = -1; c = *++s[0];}
else if (c == '+') {c = *++s[0];} // move past sign

if (!isdigit(c))
{
--s[0]; // ungetch()
return;
}

*field = 0; // *field may have been set before

while (isdigit(c))
{
*field = *field * 10 + (c - '0');
c = *++s[0];
}
*field *= sign;
--s[0]; // ungetch()
}

void printUsage(void)
{
printf("Usage : ./lines [-f field1] [-d field1] [-n field2]\n");
printf("Examples: ./lines -f+0 -d -n-1, ./lines -fd1 -n2\n");
printf("Optional arguments (field1 != field2):\n");
printf("\"-f\" - case insensitive sort (fold upper and lowercase)\n");
printf("\"-d\" - directory order sort (letters, numbers, blanks)\n");
printf("\"-n\" - numeric sort for lines or numbers within lines\n");
printf("field - integer, sort by a field (word) within each line\n");
printf("field > 0 - from the beginning of each line\n");
printf("field < 0 - from the end of each line\n");
printf("field = 0 or missing - sort (the rest of) each line\n");
}

int getLine(char *, int); // getline() is declared in stdio.h
char* alloc(int); // allocate storage

int readlines (char *lineptr[], int maxlines) // read input lines
{
int len; // length of line
int nlines; // no of input lines read

char *p, line[MAXLEN];

nlines = 0;
while ((len = getLine(line, MAXLEN)) > 0)
{
if(nlines >= maxlines || (p = alloc(len+1)) == NULL)
{return -1;} // len+1 for ending '\0'
else
{
strcpy(p, line);
lineptr[nlines++] = p;
}
}

return nlines;
}

int getLine(char *s, int lim)
{
int c = EOF; // initialize
char *p;
// getchar() is only executed if ((p-s) < (lim-1)):
for (p = s; (p-s) < (lim-1) && (c = getchar()) != EOF && c != '\n'; p++)
{ // from 0 to lim-2; s[lim-2]='\n', s[lim-1]='\0'
*p = c;
}
if (c == '\n')
{*p++ = c;}

*p = '\0'; // the null character ends a string

return p-s; // max(p-s) == (lim-1)
}

#define ALLOCSIZE (MAXLINES * MAXLEN)

static char allocbuf[ALLOCSIZE]; // storage for alloc()
static char *allocp = allocbuf; // next free position

char* alloc(int n) // return pointer to n chars
{
if (allocbuf + ALLOCSIZE - allocp >= n) // it fits
{
allocp += n;
return allocp-n; // old pointer
}
else {return 0;} // null pointer
}
void afree(char *p) // free storage pointed to by p
{
if (p >= allocbuf && p < allocbuf + ALLOCSIZE)
{allocp = p;}
}

void writelines (char *lineptr[], int nlines, char *filename) // write output lines
{
FILE *f = fopen(filename, "w"); // open for writing

while (nlines-- > 0)
{fprintf(f, "%s", *lineptr++);}

fclose(f);
}

void swap(void *v[], int i, int j); // interchange v[i], v[j]

// sort v[left] ... v[right] in increasing order
void qSort1(void *v[], int left, int right, int (*comp)(void*, void*))
{// qsort() is declared in stdlib.h
int i, last;

if (left >= right) // do nothing if array contains fewer than 2 elements
{return;} // stopping condition for recursion

swap(v, left, (left + right) / 2); // move partition element to v[left]
last = left; // start moving elements to last position

for (i = left + 1; i <= right; i++)
{
if (comp(v[i], v[left]) < 0) // v[i] < v[left]
{swap(v, ++last, i);} // smaller to the left, bigger to the right
}

swap(v, left, last); // return partition element to the middle position
qSort1(v, left, last-1, comp); // sort smaller elements
qSort1(v, last+1, right, comp); // sort bigger elements
}

// extract substrings from str based on fields
void subStr(char *str, char *substr1, char *substr2, int *fields);

// sort v[left] ... v[right] in increasing order
void qSort2(void *v[], int left, int right, int *fields,
int (*lexComp)(void*, void*), int (*numComp)(void*, void*))
{ // lexComp() can be strcmp(), strfcmp(), strdcmp(), or strfdcmp()
int i, last; // numComp() can be numcmp() or NULL

if (left >= right) // do nothing if array contains fewer than 2 elements
{return;} // stopping condition for recursion

swap(v, left, (left + right) / 2); // move partition element to v[left]
last = left; // start moving elements to last position

int c; // comparison result
char sub1[MAXLEN], sub2[MAXLEN]; // allocate space for substrings
char subL1[MAXLEN], subL2[MAXLEN]; // substrings of v[i], v[left]

for (i = left + 1; i <= right; i++)
{
subStr(v[i], sub1, sub2, fields);
subStr(v[left], subL1, subL2, fields);

c = lexComp(sub1, subL1);
if (c < 0) // sub1 < subL1
{swap(v, ++last, i);} // smaller to the left, bigger to the right
else if (c == 0 && numComp != NULL) // sub1 == subL1, numcmp
{
c = numComp(sub2, subL2); // numcmp(sub2, subL2)
if (c < 0) // sub2 < subL2
{swap(v, ++last, i);} // smaller to the left, bigger to the right
}
}

swap(v, left, last); // return partition element to the middle position
qSort2(v, left, last-1, fields, lexComp, numComp); // sort smaller elements
qSort2(v, last+1, right, fields, lexComp, numComp); // sort bigger elements
}

void swap(void *v[], int i, int j) // interchange v[i], v[j]
{
void *temp;

temp = v[i]; // swap pointers
v[i] = v[j];
v[j] = temp;
}

#include <stdlib.h> // for abs(), atof()

// extract substrings from str based on fields
void subStr(char *str, char *substr1, char *substr2, int *fields)
{ // fields[0] != fields[1] >= 0, words separated by ' ', '\t'
char copy[MAXLEN]; // used by strtok()
strcpy(copy, str); // save str, strtok() destroys copy[]
char sep[] = " \t"; // {' ', '\t', '\0'} // separators
char *token = strtok(copy, sep);

if (token == NULL) // empty line
{
substr1[0] = '\0';
substr2[0] = '\0';
return;
}
// here token != NULL
char *tokens[MAXLEN];
int i = 0;

while (token != NULL)
{
tokens[i++] = token;
token = strtok(NULL, sep);
}
// i holds the length of tokens[] (no of tokens)
if (abs(fields[0]) > i) // field[0] greater than no of words
{
substr1[0] = '\0';
if (abs(fields[1]) > i) // field[1] greater than no of words
{
substr2[0] = '\0';
return;
} // here abs(fields[1]) <= i
if (fields[1] == 0) // hold the (rest of the) line
{
strcpy(substr2, str);
} // here fields[1] != 0
else if (fields[1] > 0)
{strcpy(substr2, tokens[fields[1]-1]);}
else // fields[1] < 0
{strcpy(substr2, tokens[i+fields[1]]);} // i-abs(fields[1])
return;
} // here abs(fields[0]) <= i
if (abs(fields[1]) > i) // field[1] greater than no of words
{
substr2[0] = '\0';
if (fields[0] == 0) // hold the (rest of the) line
{
strcpy(substr1, str);
} // here fields[0] != 0
else if (fields[0] > 0)
{strcpy(substr1, tokens[fields[0]-1]);}
else // fields[0] < 0
{strcpy(substr1, tokens[i+fields[0]]);} // i-abs(fields[0])
return;
}
// here abs(fields[0]) <= i, abs(fields[1]) <= i

int j, index;

if (fields[0] == 0) // fields[1] != 0
{ // numeric sort for one part, lexicographic for the rest of string
if (fields[1] > 0)
{index = fields[1]-1;}
else // fields[1] < 0
{index = i+fields[1];} // i-abs(fields[1])
strcpy(substr2, tokens[index]);

substr1[0] = '\0';
for (j = 0; j < i; j++)
{
if (j != index)
{
strcat(substr1, tokens[j]); // tokens do not contain delimiters ' ', '\t'
if (j < i-1) {strcat(substr1," ");} // space necessary for directory order
}
}
}
else if (fields[1] == 0) // fields[0] != 0
{ // lexicographic sort for one part, numeric for the rest of string
if (fields[0] > 0)
{index = fields[0]-1;}
else // fields[0] < 0
{index = i+fields[0];} // i-abs(fields[0])
strcpy(substr1, tokens[index]);

substr2[0] = '\0';
for (j = 0; j < i; j++)
{
if (j != index)
{
strcat(substr2, tokens[j]); // tokens do not contain delimiters ' ', '\t'
if (j < i-1) {strcat(substr2," ");} // space necessary for directory order
}
}
}
else // (fields[0] != 0) != (fields[1] != 0)
{ // lexicographic and numeric sort each for one part of the string
if (fields[0] > 0)
{strcpy(substr1, tokens[fields[0]-1]);}
else // fields[0] < 0
{strcpy(substr1, tokens[i+fields[0]]);} // i-abs(fields[0])

if (fields[1] > 0)
{strcpy(substr2, tokens[fields[1]-1]);}
else // fields[1] < 0
{strcpy(substr2, tokens[i+fields[1]]);} // i-abs(fields[1])
}
}

int strfcmp(char *s1, char *s2) // case insensitive comparison
{ // (fold uppercase and lowercase together)
int i1, i2;

for ( ; *s1 && *s2; s1++, s2++) // (*s1 != '\0') && (*s2 != '\0')
{
if (*s1 != *s2)
{
if (isalpha(*s1) && isalpha(*s2))
{
if (isupper(*s1) && isupper(*s2) || islower(*s1) && islower(*s2))
{return *s1 - *s2;}
i1 = *s1 - (isupper(*s1) ? 'A' : 'a'); // here one of *s1, *s2 is upper,
i2 = *s2 - (isupper(*s2) ? 'A' : 'a'); // the other is lower
if (i1 != i2) {return i1 - i2;} // else continue;
}
else {return *s1 - *s2;}
} // else continue;
}

return *s1 - *s2; // *s1 == '\0', *s2 == '\0', or both
}

int strdcmp(char *s1, char *s2) // directory order comparison
{
for ( ; *s1 && *s2; s1++, s2++) // (*s1 != '\0') && (*s2 != '\0')
{ // compare only letters, numbers, blanks
while (*s1 && *s1 != ' ' && !isalnum(*s1)) {s1++;}
while (*s2 && *s2 != ' ' && !isalnum(*s2)) {s2++;}

if (*s1 != *s2)
{ // space is ASCII 32 < digits, letters (alphanumeric)
return *s1 - *s2;
} // else continue;
}

return *s1 - *s2; // *s1 == '\0', *s2 == '\0', or both
}

int strfdcmp(char *s1, char *s2) // case insensitive, directory order
{
int i1, i2;

for ( ; *s1 && *s2; s1++, s2++) // (*s1 != '\0') && (*s2 != '\0')
{ // compare only letters, numbers, blanks
while (*s1 && *s1 != ' ' && !isalnum(*s1)) {s1++;}
while (*s2 && *s2 != ' ' && !isalnum(*s2)) {s2++;}

if (*s1 != *s2)
{ // space is ASCII 32 < digits, letters (alphanumeric)
if (!isalpha(*s1) || !isalpha(*s2) ||
isupper(*s1) && isupper(*s2) || islower(*s1) && islower(*s2))
{return *s1 - *s2;}
i1 = *s1 - (isupper(*s1) ? 'A' : 'a'); // here one of *s1, *s2 is upper,
i2 = *s2 - (isupper(*s2) ? 'A' : 'a'); // the other is lower
if (i1 != i2) {return i1 - i2;} // else continue;
} // else continue;
}

return *s1 - *s2; // *s1 == '\0', *s2 == '\0', or both
}

int numcmp(char *s1, char *s2) // numeric comparison
{
double v1, v2;

v1 = atof(s1);
v2 = atof(s2);

if (v1 < v2) {return -1;}
if (v1 > v2) {return 1;}
return 0;
// return (int)(v1-v2); may not work because of truncation
}

/*
gcc lines.c -o lines
./lines < unsorted.txt // created file "sort.txt"

./lines f
Usage : ./lines [-f field1] [-d field1] [-n field2]
Examples: ./lines -f+0 -d -n-1, ./lines -fd1 -n2
Optional arguments (field1 != field2):
"-f" - case insensitive sort (fold upper and lowercase)
"-d" - directory order sort (letters, numbers, blanks)
"-n" - numeric sort for lines or numbers within lines
field - integer, sort by a field (word) within each line
field > 0 - from the beginning of each line
field < 0 - from the end of each line
field = 0 or missing - sort (the rest of) each line

./lines -c
Illegal option: 'c'
Usage : ./lines [-f field1] [-d field1] [-n field2]
...

./lines -fdc
Illegal option: 'c'
Usage : ./lines [-f field1] [-d field1] [-n field2]
...

./lines -f < unsorted.txt // created file "fsort.txt"

./lines -d < unsorted.txt // created file "dsort.txt"

./lines -f -d -df < unsorted.txt // created file "fdsort.txt"

./lines -n < unsorted.txt // created file "nsort.txt"

./lines -f -d -n
"-f", "-d" work together, but not with "-n"
Usage : ./lines [-f field1] [-d field1] [-n field2]
...

./lines -fd1 -n1
"-f", "-d" cannot overlap with "-n" over the same field
Usage : ./lines [-f field1] [-d field1] [-n field2]
...

./lines -n-1 < unsorted.txt // created file "nsort.txt"
// strings are sorted lexicographically, numbers are sorted numerically

./lines -f -n-1 < unsorted.txt // created file "fnsort.txt"
// case insensitive sort for strings

./lines -d -n-1 < unsorted.txt // created file "dnsort.txt"
// directory order sort for strings

./lines -df -n-1 < unsorted.txt // created file "fdnsort.txt"
./lines -fd0 -n-1 < unsorted.txt // created file "fdnsort.txt"
// case insensitive, directory order sort for strings

rm *sort.txt // clean
*/





Notes:

You may need to redefine MAXLINES and MAXLEN if you use other input file.
File fdnsort.txt looks almost like an index (see the Index_program).









Chapter_5     Exercise_5-16 BACK_TO_TOP Index_program     Exercise_5-18



Comments

Popular posts from this blog

Contents

Blogger Page Margins in Contempo