Exercise 5-20-1 (Expanded declare - Decode, describe declarations)
Chapter_5 Exercise_5-19 Expanded_declarations | Exercise_5-20-2 |
Exercise 5-20 K&R, p. 126
Exercise 5-20. Expand dcl to handle declarations with function argument types, qualifiers like const, and so on.
Note: See Expanded_declarations for the text files.
edcl.c download
#include <stdio.h> // for printf(), getchar(), EOF
#include <string.h> // for strcat(), strcpy(), strcmp()
#include <ctype.h> // for isalpha(), isalnum(), isspace()
// TODO: Add references
/*
dcl: datatype optional modifiers optional *s direct-dcl
direct-dcl: name
(dcl)
direct-dcl(optional arguments)
direct-dcl[optional size]
arguments: argument
argument, arguments
arguments, ...
argument: dcl with optional name
size: integral type
*/
#define FALSE 0
#define TRUE 1
// static const short unsigned int (5 strings or words)
#define MAXNODTMODQ 10 // max no of data type words plus modifiers, qualifiers
#define MAXLVLREC 100 // max level of recursion, int f(char g(long double d))
#define MAXNOP 100 // max no of pointers to a datatype, array, or function
#define MAXTOKEN 100 // max token length
#define MAXOUTPUT 5000 // max length of output string
// STRING - data type, name or modifier, ELLIPSIS - variadic arguments, ...
enum {STRING, PARENS, BRACKETS, POINTFA, ARGUMENTS, ELLIPSIS};
// POINTFA - '(' followed by pointer to function or array
// ARGUMENTS - '(' followed by function arguments (parameters in a declaration)
#define ARGSEP ',' // argument separator
char *DTMODQ[] = // data types, modifiers, and qualifiers
{"char", "signed", "unsigned", "short", "int", "long", "float", "double",
"const", "static", "volatile", "register", "void", "" // empty string marks the end
}; // reserved words cannot be used as names
void dcl(void); // parse a declarator
void dirdcl(void); // parse a direct declarator
int gettoken(void);
int VERBOSE = FALSE;
int ARG = FALSE; // argument
int FOUNDNAME = FALSE;
int tokentype; // type of last token
char token[MAXTOKEN]; // last token string
char datatype[MAXLVLREC][MAXNODTMODQ][MAXTOKEN];
// pointer qualifiers, const, volatile:
char pointqs[MAXLVLREC][MAXNOP][MAXTOKEN*2];
int levrec = 0; // level of recursion, first index of datatype[], pointqs[]
char name[MAXTOKEN]; // identifier name
char out[MAXOUTPUT]; // output string
char* trim(char *); // remove extra spaces, return pointer to new string
int main(int argc, char *argv[]) // convert declaration to words
{
int c;
while (--argc > 0 && (*++argv)[0] == '-') // optional argument
{
while (c = *++argv[0]) // -v, -vv, -v -vv, etc.
{
switch(c)
{
case 'v' :
VERBOSE = TRUE;
break;
default:
printf("Illegal option: '%c'\n", c);
printf("Usage: ./edcl [-v]\n");
printf("-v - verbose\n");
return 1; // exit main(), signalling error
}
}
}
if (argc) // if (argc > 0)
{
printf("Usage: ./edcl [-v]\n");
printf("-v - verbose\n");
return 1; // end program, signalling error
}
tokentype = 0; // initialize
while(tokentype != EOF)
{
FOUNDNAME = FALSE; // reset for each new line
ARG = FALSE; // reset
levrec = 0; // reset
name[0] = '\0'; // (re)set
token[0] = '\0'; // (re)set
out[0] = '\0'; // (re)set
dcl(); // parse line
if (tokentype != '\n' && tokentype != EOF)
{
printf("Syntax error\n");
token[0] = '\0'; // reset
while(gettoken() != '\n' && tokentype != EOF)
{ // move to the next line or end of file
strcat(out, token);
token[0] = '\0'; // reset
}
}
if (name[0] == 0 && out[0] == '\0')
{continue;} // empty line, nothing to print
// else
printf("%s\n", trim(out));
}
return 0;
}
void dcl(void) // parse a declarator
{
gettoken();
if(tokentype == '\n' || tokentype == EOF)
{
// printf("Expected declaration\n");
return;
}
if (tokentype == ELLIPSIS) // variadic arguments, ...
{return;} // return to caller, dirdcl()
// STRING - data type, modifier, qualifier, or name
if (tokentype != STRING && tokentype != '*') // (*pf)(), (*pa)[]
{ // token not a string, tokentype not '*'; (dcl) in dirdcl
printf("Expected data type or pointer\n"); // int i, *pf, *pa
return; // do not process the rest of line
}
int dti = 0; // datatype index, second dimension
// (no of words of current datatype)
while (tokentype == STRING) // read datatype and eventually the name
{ // get several strings as datatype and , modifiers
strcpy(datatype[levrec][dti++], token);
gettoken(); // first tokens on each line are data type, qualifiers, modifiers
}
int i;
int match = FALSE; // test if assumed name matches reserved words
if (dti > 0) // there is a datatype
{
for (i = 0; DTMODQ[i][0] != '\0'; i++) // ""[i][0] == '\0'
{ // the empty string "" ends the string array DTMODQ[]
if (strcmp(datatype[levrec][dti-1], DTMODQ[i]) == 0)
{ // should also check previous values stored in datatype[]
match = TRUE;
break;
}
}
if (!match) // found name
{ // modifiers, qualifiers are reserved words, name is not
strcpy(name, datatype[levrec][dti-1]);
strcat(out, name); // out[] may not be empty, so we cannot use strcpy()
if (VERBOSE) {strcat(out, ": ");}
FOUNDNAME = TRUE;
dti--; // to account for the removed name
if (dti == 0) // datatype should precede the name
{
printf("Datatype is missing\n");
FOUNDNAME = FALSE; // reset
name[0] = '\0'; // reset
return; // do not process the rest of line
}
}
// if (match), datatype[] holds the datatype,
// modifiers, qualifiers, but not the name
if(tokentype == '\n' || tokentype == EOF)
{ // variable declaration, like 'int i'
for (i = 0; i < dti; i++)
{
strcat(out, " ");
strcat(out, datatype[levrec][i]); // don't put space at the end
}
if (!FOUNDNAME) // const static signed short int
{
printf("Name is missing\n");
}
FOUNDNAME = FALSE; // reset
name[0] = '\0'; // reset
return;
}
}
int ns = 0; // no of stars
int foundpn = FALSE; // found pointer name
while (tokentype == '*' && !foundpn) // count *s until pointer name
{
ns++;
pointqs[levrec][ns-1][0] = '\0'; // initialize
gettoken();
while (tokentype == STRING) // volatile const name
{
if (strcmp(token, "volatile") == 0 || strcmp(token, "const") == 0)
{ // do not write *const const volatile const p, write *const volatile p
strcat(pointqs[levrec][ns-1], " ");
strcat(pointqs[levrec][ns-1], token);
}
else // found name
{
strcpy(name, token);
strcat(out, name); // out[] may not be empty, so we cannot use strcpy()
if (VERBOSE) {strcat(out, ": ");}
FOUNDNAME = TRUE; // could also be an argument name
foundpn = TRUE; // break out of outer while()
gettoken();
break; // out of inner while()
}
gettoken();
}
}
if (tokentype == '\n' || tokentype == EOF)
{
while (ns-- > 0)
{
strcat(out, pointqs[levrec][ns]); // const volatile
if (VERBOSE)
{strcat(out, " pointer to");}
else {strcat(out, " *");}
}
for (i = 0; i < dti; i++)
{
strcat(out, " ");
strcat(out, datatype[levrec][i]); // don't put space at the end
}
FOUNDNAME = FALSE; // reset
name[0] = '\0'; // reset
return;
}
levrec++; // after reading current datatype, pointers qualifiers
// dirdcl() may call dcl(), which should have another levrec
dirdcl(); // recursive-descent call
levrec--; // upon writing current datatype, pointers qualifiers
while (ns-- > 0)
{
strcat(out, pointqs[levrec][ns]); // const volatile
if (VERBOSE)
{strcat(out, " pointer to");}
else {strcat(out, " *");}
}
for (i = 0; i < dti; i++)
{
strcat(out, " ");
strcat(out, datatype[levrec][i]); // don't put space at the end
}
}
// moved name processing to dcl(), along with reading
// data types, modifiers, and qualifiers, including for pointers
void dirdcl(void) // parse a direct declarator
{
if (tokentype == POINTFA) // (dcl): int (*pf)(), char (*pa)[]
{ // '(' followed by pointer to function or array
dcl(); // recursive-descent call
if (tokentype != ')')
{
printf("Error: missing ')'\n");
return; // do not process the rest of line
}
// else
gettoken();
}
if (!FOUNDNAME)
{
printf("Name is missing\n");
return; // do not process the rest of line
}
// name(), name[], (dcl)(), (dcl)[]
while(tokentype == PARENS || tokentype == BRACKETS)
{ // last token should be '\n' or EOF
if (tokentype == PARENS)
{
if (VERBOSE)
{strcat(out, " function returning");}
else{strcat(out, " ()");}
}
else // BRACKETS
{
if (VERBOSE)
{
strcat(out, " array");
strcat(out, token); // token holds [...]
strcat(out, " of");
}
else
{
strcat(out, " ");
strcat(out, token); // token holds [...]
}
}
gettoken();
}
if (tokentype == ARGUMENTS) // function with arguments
{
ARG = TRUE; // arguments have optional names
// ARG remains set (TRUE) till the end of line
if (VERBOSE)
{strcat(out, " function taking as arguments: (");}
else {strcat(out, " (");}
dcl();
while(tokentype == ARGSEP)
{
strcat(out, ", ");
dcl();
}
if (tokentype == ELLIPSIS)
{
strcat(out, token); // "..."
gettoken();
}
if (tokentype != ')')
{
printf("Error: missing ')'\n");
return; // do not process the rest of line
}
// else
if (VERBOSE) {strcat(out, ") and returning");}
else {strcat(out, ")");}
gettoken();
}
}
int getch(void);
void ungetch(int);
int gettoken(void)
{
int c;
char *p = token;
while((c = getch()) == ' ' || c == '\t')
{} // skip beginning whitespace
if (c == '(')
{
while((c = getch()) == ' ' || c == '\t')
{} // skip whitespace
if (c == ')')
{
strcpy(token, "()");
return tokentype = PARENS;
}
else
{
ungetch(c);
if (c == '*') // pointer to function or array
{return tokentype = POINTFA;}
// else // function arguments (parameters in a declaration)
return tokentype = ARGUMENTS;
}
}
else if (c == ',') // argument separator
{
return tokentype = ARGSEP;
}
else if (c == '.')
{
if ((c = getch()) != '.' || (c = getch()) != '.')
{
printf("Expected ellipsis, ...\n");
return tokentype = c;
}
else
{
strcpy(token, "...");
return tokentype = ELLIPSIS;
}
}
else if (c == '[')
{
for (*p++ = c; isalnum(c = getch()); ) // isdigit() or 0x, then isxdigit()
{ // token holds [...]
*p++ = c;
}
if (c != ']')
{
printf("Expected ']'\n");
*p = '\0'; // end token string
return tokentype = c;
}
// else
*p++ = c; // ']'
*p = '\0'; // end token string
return tokentype = BRACKETS;
}
else if (isalpha(c)) // letter, not underscore
{ // name or modifier or qualifier
for (*p++ = c; isalnum(c = getch()); ) // letter or digit (decimal or hex)
{*p++ = c;}
*p = '\0'; // end string
ungetch(c);
return tokentype = STRING;
}
else {return tokentype = c;} // could be '*' or ')' or '\n' or EOF
}
// buffer for ungetch():
int buf = EOF-1; // not a real character, not even EOF
int getch(void) // get a (possibly pushed-back) character
{
if (buf < EOF)
{
return getchar();
}
int temp = buf; // buf >= EOF
buf = EOF-1; // reset buf
return temp;
}
// push character back on input (make it available for the next getch()):
void ungetch(int c)
{
buf = c;
}
// remove space after '(', several contiguous spaces are turned into one
char* trim(char *s) // remove extra spaces, return pointer to new string
{
char *p = s, *r = s;
while (*p != 0) // while (*p)
{
if (*p == '(' || *p == '[')
{
*r++ = *p;
p++;
while(*p && isspace(*p)) {p++;}
}
else if (isspace(*p))
{
*r++ = ' '; // keep only one space
while(*p && isspace(*p)) {p++;}
}
else
{
*r++ = *p;
p++;
}
}
*r = '\0'; // end string
return s;
}
/*
gcc edcl.c -o edcl
./edcl // Enter (input from keyboard)
char (*apf[])() // Enter
apf [] * () char
char **argv // Enter
argv * * char
int (*daytab)[13] // Enter
daytab * [13] int
int *daytab[13] // Enter
daytab [13] * int
int (**f)() // Enter
f * * () int
int (*const volatile const *const f)() // Enter
f const * const volatile const * () int
// Ctrl^D in Linux, Ctrl^Z+Enter in Windows (EOF)
./edcl -v // Enter (input from keyboard)
char (*apf[])() // Enter
apf [] * () char
char **argv // Enter
argv * * char
int (*daytab)[13] // Enter
daytab * [13] int
int *daytab[13] // Enter
daytab [13] * int
int (**f)() // Enter
f * * () int
int (*const volatile const *const f)() // Enter
f: const pointer to const volatile const pointer to function returning int
// Ctrl^D in Linux, Ctrl^Z+Enter in Windows (EOF)
./edcl // test error recovery
1 // Enter
Expected data type
Syntax error
char **argv // Enter
argv * * char
. // Enter
Expected ellipsis, ...
.. // Enter
Expected ellipsis, ...
... // Enter
Syntax error
int i // Enter
i int
f() // Enter
Datatype is missing
Syntax error
f // name has been reset, it is not argv (from previous line)
int *f() // Enter
f () * int
char f(() // Enter
Expected data type or pointer
Error: missing ')'
Syntax error
f (char
int i // Enter
i int
int *f()) // Enter
Syntax error
f () * int
char *argv[] // Enter
argv [] * char
./edcl -v // test error recovery
1 // Enter
Expected data type
Syntax error
char **argv // Enter
argv * * char
. // Enter
Expected ellipsis, ...
.. // Enter
Expected ellipsis, ...
... // Enter
Syntax error
int i // Enter
i: int
f() // Enter
Datatype is missing
Syntax error
f // name has been reset, it is not argv (from previous line)
int *f() // Enter
f () * int
char f(() // Enter
Expected data type or pointer
Error: missing ')'
Syntax error
f (char
int i // Enter
i int
int *f()) // Enter
Syntax error
f () * int
char *argv[] // Enter
argv [] * char
./edcl < edcl.txt > "unedcl(copy).txt"
diff -s unedcl.txt "unedcl(copy).txt"
// Files unedcl.txt and unedcl(copy).txt are identical
meld unedcl.txt "unedcl(copy).txt"
// Files are identical
./edcl -v < edcl.txt > "expanded(copy).txt"
diff -s expanded.txt "expanded(copy).txt"
// Files expanded.txt and expanded(copy).txt are identical
meld expanded.txt "expanded(copy).txt"
// Files are identical
*/
Note: File names "unedcl(copy).txt" and "expanded(copy).txt" are within quotes because the shell interprets parentheses (subshell).
Chapter_5 Exercise_5-19 Exp_decl | BACK_TO_TOP | Exercise_5-20-2 |
Comments
Post a Comment