Exercise 5-17 (lines - Field-handling sort, all options)
Chapter_5 Exercise_5-16 | Index_program Exercise_5-18 |
Exercise 5-17 K&R, p. 121
Exercise 5-17. Add a field-handling capability, so sorting may be done on fields within lines, each field sorted according to an independent set of options. (The index for this book was sorted with -df for the index category and -n for the page numbers.)
CONTENTS: unsorted.txt lines.c
unsorted.txt K&R, p. 263-271 (Index) download
call by value 95
command-line arguments 116
<< left shift operator 206
a.out 6
< less than operator 41
alloc function 101
a.out 70
<< left shift operator 49
call by reference 27
command-line arguments 118
<ctype.h> header 248
temperature conversion program 12
command-line arguments 114
<= less or equal operator 206
increment operator, ++ 203
temperature conversion program 9
default label 222
definition, function 69
pointer to function 201
#define, multi-line 89
call by value 202
++ increment operator 18
command-line arguments 115
call by value 27
\t tab character 193
++ increment operator 46
UNIX file system 169
++ increment operator 203
definition, function 25
< less than operator 206
#define with arguments 89
<ctype.h> header 43
increment operator, ++ 106
pointer, null 102
temperature conversion program 13
definition, function 225
pointer to function 118
increment operator, ++ 18
\t tab character 11
default label 58
increment operator, ++ 46
less than operator, < 206
pointer initialization 102
left shift operator, << 206
temperature conversion program 8
less than operator, < 41
pointer initialization 138
command-line arguments 117
++ increment operator 106
pointer to function 147
pointer, null 198
\t tab character 38
temperature conversion program 15
UNIX file system 179
\t tab character 8
<= less or equal operator 41
left shift operator, << 49
lines.c download
#include <stdio.h> // for getchar(). printf(), EOF, NULL, FILE, fopen(), fclose()
#include <string.h> // for strcat(), strcpy(), strcmp()
#include <ctype.h> // for isdigit(), isalpha(), isupper(), islower(), isalnum()
#define MAXLINES 5000 // max no of lines to be sorted
#define MAXLEN 1000 // max line length
#define MAXFNLEN 100 // max file name length
void initField(int *field, int c, char **s);
void printUsage(void);
int readlines (char *lineptr[], int nlines);
void writelines (char *lineptr[], int nlines, char *filename);
int strfcmp(char*, char*); // case insensitive (fold upper and lowercase)
int strdcmp(char*, char*); // directory order comparison
int strfdcmp(char*, char*); // case insensitive, directory order
int numcmp(char*, char*); // numeric comparison
void qSort1(void *lineptr[], int left, int right, // qsort() is declared in stdlib.h
int (*comp)(void*, void*)); // comparison function,
// strcmp(), strfcmp(), strdcmp(), strfdcmp(), or numcmp()
void qSort2(void *lineptr[], int left, int right, int *fields,
int (*lexComp)(void*, void*), int (*numComp)(void*, void*));
// lexComp() can be strcmp(), strfcmp(), strdcmp(), or strfdcmp()
// numComp() can be numcmp() or NULL
// first do lexicographic sort, then numeric sort on 'equal' lex compared lines
// if (strcmp(line1) == strcmp(line2)), perform numeric sort on a field
int main(int argc, char *argv[])
{ // sorting can be generalized to n fields
int fields[] = {0, 0}; // fields has size 2 (lexicographic and numeric)
// lexicographic sort: strcmp(), strfcmp(), strdcmp(), strfdcmp()
// fields within each line, delimited by ' ' and '\t',
// null fields means regular sort
int orders[3] = {0}; // alternative initialization
// orders: fold (case insensitive), directory, numeric
int c;
while (--argc > 0 && (*++argv)[0] == '-') // optional arguments
{
while (c = *++argv[0]) // -f -d1 -n-1, -f+0 -d -n-1, -fd1, -n2, etc.
{
switch(c)
{
case 'f' :
orders[0] = 1; // case insensitive (fold) order
initField(fields, c, argv); // &fields[0]
break;
case 'd' :
orders[1] = 1; // directory order
initField(fields, c, argv); // &fields[0]
break;
case 'n' :
orders[2] = 1; // numeric order
initField(fields+1, c, argv); // &fields[1]
break;
default:
printf("Illegal option: '%c'\n", c);
printUsage();
return 1; // end program, signalling error
}
}
}
if (argc) // if (argc > 0)
{
printUsage();
return 1; // end program, signalling error
}
if (orders[2] && !fields[1] && // numeric for the entire line
(orders[0] || orders[1]) && !fields[0]) // fold or dir or both for the entire line
{
printf("\"-f\", \"-d\" work together, but not with \"-n\"\n");
printUsage();
return 1; // end program, signalling error
}
if (fields[1] && (fields[0] == fields[1])) // fields[0] == fields[1] > 0
{ // fields[1] > 0 implies orders[2] >0,
// fields[0] > 0 implies (orders[0] || orders[1]) > 0
printf("\"-f\", \"-d\" cannot overlap with \"-n\" over the same field\n");
printUsage();
return 1; // end program, signalling error
}
char *lineptr[MAXLINES];
int nlines; // no of input lines read
char filename[MAXFNLEN];
int i = 0;
if (orders[0]) // case insensitive sort
{filename[i++] = 'f';}
if (orders[1]) // directory order sort
{filename[i++] = 'd';}
if (orders[2]) // numeric sort
{filename[i++] = 'n';}
filename[i] = '\0';
strcat(filename, "sort.txt");
int (*comp)(void*, void*);
int (*lexComp)(void*, void*), (*numComp)(void*, void*) = NULL;
if ((nlines = readlines(lineptr, MAXLINES)) >= 0)
{
if (fields[0] == 0 && fields[1] == 0) // regular sort:
{ // lexicographic or numeric sort for the entire line
if (orders[0] && orders[1]) // case insensitive, directory order
{comp = (int (*)(void*, void*))strfdcmp;}
else if (orders[0]) // case insensitive (fold)
{comp = (int (*)(void*, void*))strfcmp;}
else if (orders[1]) // directory order
{comp = (int (*)(void*, void*))strdcmp;}
else if (orders[2]) // numeric order
{comp = (int (*)(void*, void*))numcmp;}
else {comp = (int (*)(void*, void*))strcmp;}
qSort1((void**)lineptr, 0, nlines-1, comp);
}
else
{
if (orders[0] && orders[1]) // case insensitive, directory order
{lexComp = (int (*)(void*, void*))strfdcmp;}
else if (orders[0]) // case insensitive (fold)
{lexComp = (int (*)(void*, void*))strfcmp;}
else if (orders[1]) // directory order
{lexComp = (int (*)(void*, void*))strdcmp;}
else {lexComp = (int (*)(void*, void*))strcmp;}
if (orders[2]) // numeric order
{numComp = (int (*)(void*, void*))numcmp;}
qSort2((void**)lineptr, 0, nlines-1, fields, lexComp, numComp);
}
writelines(lineptr, nlines, filename);
}
else
{
printf("Error: input too big to sort\n");
return 1; // exit program, signalling error
}
return 0;
}
void initField(int *field, int c, char **s)
{
c = *++s[0];
if (c == '\0')
{
--s[0]; // ungetch()
return;
}
if (c != '+' && c != '-' && !isdigit(c))
{
--s[0]; // ungetch()
return;
}
int sign = 1;
if (c == '-') {sign = -1; c = *++s[0];}
else if (c == '+') {c = *++s[0];} // move past sign
if (!isdigit(c))
{
--s[0]; // ungetch()
return;
}
*field = 0; // *field may have been set before
while (isdigit(c))
{
*field = *field * 10 + (c - '0');
c = *++s[0];
}
*field *= sign;
--s[0]; // ungetch()
}
void printUsage(void)
{
printf("Usage : ./lines [-f field1] [-d field1] [-n field2]\n");
printf("Examples: ./lines -f+0 -d -n-1, ./lines -fd1 -n2\n");
printf("Optional arguments (field1 != field2):\n");
printf("\"-f\" - case insensitive sort (fold upper and lowercase)\n");
printf("\"-d\" - directory order sort (letters, numbers, blanks)\n");
printf("\"-n\" - numeric sort for lines or numbers within lines\n");
printf("field - integer, sort by a field (word) within each line\n");
printf("field > 0 - from the beginning of each line\n");
printf("field < 0 - from the end of each line\n");
printf("field = 0 or missing - sort (the rest of) each line\n");
}
int getLine(char *, int); // getline() is declared in stdio.h
char* alloc(int); // allocate storage
int readlines (char *lineptr[], int maxlines) // read input lines
{
int len; // length of line
int nlines; // no of input lines read
char *p, line[MAXLEN];
nlines = 0;
while ((len = getLine(line, MAXLEN)) > 0)
{
if(nlines >= maxlines || (p = alloc(len+1)) == NULL)
{return -1;} // len+1 for ending '\0'
else
{
strcpy(p, line);
lineptr[nlines++] = p;
}
}
return nlines;
}
int getLine(char *s, int lim)
{
int c = EOF; // initialize
char *p;
// getchar() is only executed if ((p-s) < (lim-1)):
for (p = s; (p-s) < (lim-1) && (c = getchar()) != EOF && c != '\n'; p++)
{ // from 0 to lim-2; s[lim-2]='\n', s[lim-1]='\0'
*p = c;
}
if (c == '\n')
{*p++ = c;}
*p = '\0'; // the null character ends a string
return p-s; // max(p-s) == (lim-1)
}
#define ALLOCSIZE (MAXLINES * MAXLEN)
static char allocbuf[ALLOCSIZE]; // storage for alloc()
static char *allocp = allocbuf; // next free position
char* alloc(int n) // return pointer to n chars
{
if (allocbuf + ALLOCSIZE - allocp >= n) // it fits
{
allocp += n;
return allocp-n; // old pointer
}
else {return 0;} // null pointer
}
void afree(char *p) // free storage pointed to by p
{
if (p >= allocbuf && p < allocbuf + ALLOCSIZE)
{allocp = p;}
}
void writelines (char *lineptr[], int nlines, char *filename) // write output lines
{
FILE *f = fopen(filename, "w"); // open for writing
while (nlines-- > 0)
{fprintf(f, "%s", *lineptr++);}
fclose(f);
}
void swap(void *v[], int i, int j); // interchange v[i], v[j]
// sort v[left] ... v[right] in increasing order
void qSort1(void *v[], int left, int right, int (*comp)(void*, void*))
{// qsort() is declared in stdlib.h
int i, last;
if (left >= right) // do nothing if array contains fewer than 2 elements
{return;} // stopping condition for recursion
swap(v, left, (left + right) / 2); // move partition element to v[left]
last = left; // start moving elements to last position
for (i = left + 1; i <= right; i++)
{
if (comp(v[i], v[left]) < 0) // v[i] < v[left]
{swap(v, ++last, i);} // smaller to the left, bigger to the right
}
swap(v, left, last); // return partition element to the middle position
qSort1(v, left, last-1, comp); // sort smaller elements
qSort1(v, last+1, right, comp); // sort bigger elements
}
// extract substrings from str based on fields
void subStr(char *str, char *substr1, char *substr2, int *fields);
// sort v[left] ... v[right] in increasing order
void qSort2(void *v[], int left, int right, int *fields,
int (*lexComp)(void*, void*), int (*numComp)(void*, void*))
{ // lexComp() can be strcmp(), strfcmp(), strdcmp(), or strfdcmp()
int i, last; // numComp() can be numcmp() or NULL
if (left >= right) // do nothing if array contains fewer than 2 elements
{return;} // stopping condition for recursion
swap(v, left, (left + right) / 2); // move partition element to v[left]
last = left; // start moving elements to last position
int c; // comparison result
char sub1[MAXLEN], sub2[MAXLEN]; // allocate space for substrings
char subL1[MAXLEN], subL2[MAXLEN]; // substrings of v[i], v[left]
for (i = left + 1; i <= right; i++)
{
subStr(v[i], sub1, sub2, fields);
subStr(v[left], subL1, subL2, fields);
c = lexComp(sub1, subL1);
if (c < 0) // sub1 < subL1
{swap(v, ++last, i);} // smaller to the left, bigger to the right
else if (c == 0 && numComp != NULL) // sub1 == subL1, numcmp
{
c = numComp(sub2, subL2); // numcmp(sub2, subL2)
if (c < 0) // sub2 < subL2
{swap(v, ++last, i);} // smaller to the left, bigger to the right
}
}
swap(v, left, last); // return partition element to the middle position
qSort2(v, left, last-1, fields, lexComp, numComp); // sort smaller elements
qSort2(v, last+1, right, fields, lexComp, numComp); // sort bigger elements
}
void swap(void *v[], int i, int j) // interchange v[i], v[j]
{
void *temp;
temp = v[i]; // swap pointers
v[i] = v[j];
v[j] = temp;
}
#include <stdlib.h> // for abs(), atof()
// extract substrings from str based on fields
void subStr(char *str, char *substr1, char *substr2, int *fields)
{ // fields[0] != fields[1] >= 0, words separated by ' ', '\t'
char copy[MAXLEN]; // used by strtok()
strcpy(copy, str); // save str, strtok() destroys copy[]
char sep[] = " \t"; // {' ', '\t', '\0'} // separators
char *token = strtok(copy, sep);
if (token == NULL) // empty line
{
substr1[0] = '\0';
substr2[0] = '\0';
return;
}
// here token != NULL
char *tokens[MAXLEN];
int i = 0;
while (token != NULL)
{
tokens[i++] = token;
token = strtok(NULL, sep);
}
// i holds the length of tokens[] (no of tokens)
if (abs(fields[0]) > i) // field[0] greater than no of words
{
substr1[0] = '\0';
if (abs(fields[1]) > i) // field[1] greater than no of words
{
substr2[0] = '\0';
return;
} // here abs(fields[1]) <= i
if (fields[1] == 0) // hold the (rest of the) line
{
strcpy(substr2, str);
} // here fields[1] != 0
else if (fields[1] > 0)
{strcpy(substr2, tokens[fields[1]-1]);}
else // fields[1] < 0
{strcpy(substr2, tokens[i+fields[1]]);} // i-abs(fields[1])
return;
} // here abs(fields[0]) <= i
if (abs(fields[1]) > i) // field[1] greater than no of words
{
substr2[0] = '\0';
if (fields[0] == 0) // hold the (rest of the) line
{
strcpy(substr1, str);
} // here fields[0] != 0
else if (fields[0] > 0)
{strcpy(substr1, tokens[fields[0]-1]);}
else // fields[0] < 0
{strcpy(substr1, tokens[i+fields[0]]);} // i-abs(fields[0])
return;
}
// here abs(fields[0]) <= i, abs(fields[1]) <= i
int j, index;
if (fields[0] == 0) // fields[1] != 0
{ // numeric sort for one part, lexicographic for the rest of string
if (fields[1] > 0)
{index = fields[1]-1;}
else // fields[1] < 0
{index = i+fields[1];} // i-abs(fields[1])
strcpy(substr2, tokens[index]);
substr1[0] = '\0';
for (j = 0; j < i; j++)
{
if (j != index)
{
strcat(substr1, tokens[j]); // tokens do not contain delimiters ' ', '\t'
if (j < i-1) {strcat(substr1," ");} // space necessary for directory order
}
}
}
else if (fields[1] == 0) // fields[0] != 0
{ // lexicographic sort for one part, numeric for the rest of string
if (fields[0] > 0)
{index = fields[0]-1;}
else // fields[0] < 0
{index = i+fields[0];} // i-abs(fields[0])
strcpy(substr1, tokens[index]);
substr2[0] = '\0';
for (j = 0; j < i; j++)
{
if (j != index)
{
strcat(substr2, tokens[j]); // tokens do not contain delimiters ' ', '\t'
if (j < i-1) {strcat(substr2," ");} // space necessary for directory order
}
}
}
else // (fields[0] != 0) != (fields[1] != 0)
{ // lexicographic and numeric sort each for one part of the string
if (fields[0] > 0)
{strcpy(substr1, tokens[fields[0]-1]);}
else // fields[0] < 0
{strcpy(substr1, tokens[i+fields[0]]);} // i-abs(fields[0])
if (fields[1] > 0)
{strcpy(substr2, tokens[fields[1]-1]);}
else // fields[1] < 0
{strcpy(substr2, tokens[i+fields[1]]);} // i-abs(fields[1])
}
}
int strfcmp(char *s1, char *s2) // case insensitive comparison
{ // (fold uppercase and lowercase together)
int i1, i2;
for ( ; *s1 && *s2; s1++, s2++) // (*s1 != '\0') && (*s2 != '\0')
{
if (*s1 != *s2)
{
if (isalpha(*s1) && isalpha(*s2))
{
if (isupper(*s1) && isupper(*s2) || islower(*s1) && islower(*s2))
{return *s1 - *s2;}
i1 = *s1 - (isupper(*s1) ? 'A' : 'a'); // here one of *s1, *s2 is upper,
i2 = *s2 - (isupper(*s2) ? 'A' : 'a'); // the other is lower
if (i1 != i2) {return i1 - i2;} // else continue;
}
else {return *s1 - *s2;}
} // else continue;
}
return *s1 - *s2; // *s1 == '\0', *s2 == '\0', or both
}
int strdcmp(char *s1, char *s2) // directory order comparison
{
for ( ; *s1 && *s2; s1++, s2++) // (*s1 != '\0') && (*s2 != '\0')
{ // compare only letters, numbers, blanks
while (*s1 && *s1 != ' ' && !isalnum(*s1)) {s1++;}
while (*s2 && *s2 != ' ' && !isalnum(*s2)) {s2++;}
if (*s1 != *s2)
{ // space is ASCII 32 < digits, letters (alphanumeric)
return *s1 - *s2;
} // else continue;
}
return *s1 - *s2; // *s1 == '\0', *s2 == '\0', or both
}
int strfdcmp(char *s1, char *s2) // case insensitive, directory order
{
int i1, i2;
for ( ; *s1 && *s2; s1++, s2++) // (*s1 != '\0') && (*s2 != '\0')
{ // compare only letters, numbers, blanks
while (*s1 && *s1 != ' ' && !isalnum(*s1)) {s1++;}
while (*s2 && *s2 != ' ' && !isalnum(*s2)) {s2++;}
if (*s1 != *s2)
{ // space is ASCII 32 < digits, letters (alphanumeric)
if (!isalpha(*s1) || !isalpha(*s2) ||
isupper(*s1) && isupper(*s2) || islower(*s1) && islower(*s2))
{return *s1 - *s2;}
i1 = *s1 - (isupper(*s1) ? 'A' : 'a'); // here one of *s1, *s2 is upper,
i2 = *s2 - (isupper(*s2) ? 'A' : 'a'); // the other is lower
if (i1 != i2) {return i1 - i2;} // else continue;
} // else continue;
}
return *s1 - *s2; // *s1 == '\0', *s2 == '\0', or both
}
int numcmp(char *s1, char *s2) // numeric comparison
{
double v1, v2;
v1 = atof(s1);
v2 = atof(s2);
if (v1 < v2) {return -1;}
if (v1 > v2) {return 1;}
return 0;
// return (int)(v1-v2); may not work because of truncation
}
/*
gcc lines.c -o lines
./lines < unsorted.txt // created file "sort.txt"
./lines f
Usage : ./lines [-f field1] [-d field1] [-n field2]
Examples: ./lines -f+0 -d -n-1, ./lines -fd1 -n2
Optional arguments (field1 != field2):
"-f" - case insensitive sort (fold upper and lowercase)
"-d" - directory order sort (letters, numbers, blanks)
"-n" - numeric sort for lines or numbers within lines
field - integer, sort by a field (word) within each line
field > 0 - from the beginning of each line
field < 0 - from the end of each line
field = 0 or missing - sort (the rest of) each line
./lines -c
Illegal option: 'c'
Usage : ./lines [-f field1] [-d field1] [-n field2]
...
./lines -fdc
Illegal option: 'c'
Usage : ./lines [-f field1] [-d field1] [-n field2]
...
./lines -f < unsorted.txt // created file "fsort.txt"
./lines -d < unsorted.txt // created file "dsort.txt"
./lines -f -d -df < unsorted.txt // created file "fdsort.txt"
./lines -n < unsorted.txt // created file "nsort.txt"
./lines -f -d -n
"-f", "-d" work together, but not with "-n"
Usage : ./lines [-f field1] [-d field1] [-n field2]
...
./lines -fd1 -n1
"-f", "-d" cannot overlap with "-n" over the same field
Usage : ./lines [-f field1] [-d field1] [-n field2]
...
./lines -n-1 < unsorted.txt // created file "nsort.txt"
// strings are sorted lexicographically, numbers are sorted numerically
./lines -f -n-1 < unsorted.txt // created file "fnsort.txt"
// case insensitive sort for strings
./lines -d -n-1 < unsorted.txt // created file "dnsort.txt"
// directory order sort for strings
./lines -df -n-1 < unsorted.txt // created file "fdnsort.txt"
./lines -fd0 -n-1 < unsorted.txt // created file "fdnsort.txt"
// case insensitive, directory order sort for strings
rm *sort.txt // clean
*/
Notes:
You may need to redefine MAXLINES and MAXLEN if you use other input file.
File fdnsort.txt looks almost
like an index (see the
Index_program).
Chapter_5 Exercise_5-16 | BACK_TO_TOP | Index_program Exercise_5-18 |
Comments
Post a Comment