/*
* c2html.c
*
* Converts c and c++ code into HTML for publishing on the WWW
* Copyright (C) 1996-1999 Christopher Kohlhoff (chris@kohlhoff.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#if defined(HAVE_DIR_H)
#include <dir.h>
#endif
/*********************************************************************
* Structures and Definitions
*/
#define MAXIDENT 512
#define MAXLINE 512
#define KEYCHUNK 16
#define VALUECHUNK 32
/* StringArray structure for a sorted list of keywords */
typedef struct StringArray
{
char **strings;
int num;
}
StringArray;
/* Highlighting scheme to be used */
typedef struct Scheme
{
int tabwidth;
char *keywords;
char *space;
char *newline;
char *specialchar;
char *code_b;
char *code_e;
char *comment_b;
char *comment_e;
char *directive_b;
char *directive_e;
char *constant_b;
char *constant_e;
char *keyword_b;
char *keyword_e;
}
Scheme;
/* Output file and related context */
typedef struct OutputFile
{
Scheme scheme;
FILE *file;
int column;
}
OutputFile;
/*********************************************************************
* Function declarations
*/
int ConvertCodeToHtml(const char *filename, OutputFile *output);
void OutputChar(char c, OutputFile *output);
void OutputString(const char *str, OutputFile *output);
void OutputFormat(const char *str, OutputFile *output);
/* StringArray functions */
int StringArrayRead(StringArray *array, const char *filename);
char *StringArrayFind(StringArray *array, const char *match);
int StringArrayCmp(const void *a, const void *b);
FILE *FOpenOnPath(const char *fname, const char *mode);
/* Scheme functions */
int SchemeRead(Scheme * scheme, const char *filename);
char *SchemeReadKey(FILE *fp);
char *SchemeReadValue(FILE *fp);
/*********************************************************************
* Program entry point
*/
int
main(int argc, char *argv[])
{
OutputFile output;
if (argc < 2)
{
fprintf(stderr, "Usage: c2html <filename> [<scheme file>]\n");
return EXIT_FAILURE;
}
/* Initialise the output file */
SchemeRead(&output.scheme, argc > 2 ? argv[2] : "c2html.scheme");
output.file = stdout;
output.column = 0;
if (!ConvertCodeToHtml(argv[1], &output))
return EXIT_FAILURE;
return EXIT_SUCCESS;
}
/*********************************************************************
* Machine States
*/
typedef enum State
{
NORMAL,
NORMAL_LINE_START,
HALF_COMMENT,
C_COMMENT,
END_C_COMMENT,
CPP_COMMENT,
DIRECTIVE_HALF_COMMENT,
DIRECTIVE_C_COMMENT,
DIRECTIVE_END_C_COMMENT,
SINGLE_STRING,
SINGLE_STRING_ESC,
DOUBLE_STRING,
DOUBLE_STRING_ESC,
DIRECTIVE,
DIRECTIVE_LINE_WRAP,
NUMBER_BEFORE_DOT,
NUMBER_AFTER_DOT,
IDENTIFIER
}
State;
/*********************************************************************
* ConvertCodeToHtml
*
* A State Machine that turns C or C++ code into HTML
*/
int
ConvertCodeToHtml(const char *filename, OutputFile *output)
{
int c; /* the character just read */
State state; /* the current state of the machine */
int write; /* should we write the character just read */
char ident[MAXIDENT + 1]; /* the current identifier being read */
int nident; /* the position in the current identifier */
StringArray keywords; /* the array of keywords */
int finished; /* whether we have finished processing the input */
/* open the input FILE */
FILE *fp = fopen(filename, "rt");
if (fp == NULL)
return 0;
/* read in the keywords */
if (!StringArrayRead(&keywords, output->scheme.keywords))
{
fprintf(stderr, "Unable to read keywords file %s\n", output->scheme.keywords);
return 0;
}
OutputFormat(output->scheme.code_b, output);
/* state-machine to output the code */
state = NORMAL_LINE_START;
finished = 0;
while (!finished)
{
c = fgetc(fp);
/* treat EOF as '\n' to ensure any current highlighting is ended */
if (c == EOF)
{
c = '\n';
finished = 1;
}
write = 1;
switch (state)
{
case NORMAL_LINE_START:
if (c == '#')
{
OutputFormat(output->scheme.directive_b, output);
state = DIRECTIVE;
break;
}
else if (!isspace(c))
state = NORMAL;
/* fall through */
case NORMAL:
if (c == '\n')
state = NORMAL_LINE_START;
else if (c == '/')
state = HALF_COMMENT, write = 0;
else if (isdigit(c))
{
OutputFormat(output->scheme.constant_b, output);
state = NUMBER_BEFORE_DOT;
}
else if (isalpha(c) || c == '_')
{
nident = 0;
ident[nident++] = c;
ident[nident] = '\0';
state = IDENTIFIER;
write = 0;
}
else if (c == '\'')
{
OutputFormat(output->scheme.constant_b, output);
state = SINGLE_STRING;
}
else if (c == '\"')
{
OutputFormat(output->scheme.constant_b, output);
state = DOUBLE_STRING;
}
break;
case HALF_COMMENT:
if (c == '/')
{
OutputFormat(output->scheme.comment_b, output);
OutputChar('/', output);
state = CPP_COMMENT;
}
else if (c == '*')
{
OutputFormat(output->scheme.comment_b, output);
OutputChar('/', output);
state = C_COMMENT;
}
break;
case C_COMMENT:
if (c == '*')
state = END_C_COMMENT;
break;
case END_C_COMMENT:
if (c == '/')
{
OutputChar(c, output);
OutputFormat(output->scheme.comment_e, output);
state = NORMAL, write = 0;
}
else if (c != '*')
state = C_COMMENT;
break;
case CPP_COMMENT:
if (c == '\n')
{
OutputFormat(output->scheme.comment_e, output);
state = NORMAL_LINE_START;
}
break;
case DIRECTIVE_HALF_COMMENT:
if (c == '/')
{
OutputFormat(output->scheme.directive_e, output);
OutputFormat(output->scheme.comment_b, output);
state = CPP_COMMENT;
}
else if (c == '*')
{
OutputFormat(output->scheme.directive_e, output);
OutputFormat(output->scheme.comment_b, output);
state = DIRECTIVE_C_COMMENT;
}
else
state = DIRECTIVE;
OutputChar('/', output);
break;
case DIRECTIVE_C_COMMENT:
if (c == '*')
state = DIRECTIVE_END_C_COMMENT;
break;
case DIRECTIVE_END_C_COMMENT:
if (c == '/')
{
OutputChar(c, output);
OutputFormat(output->scheme.comment_e, output);
OutputFormat(output->scheme.directive_b, output);
state = DIRECTIVE, write = 0;
}
else if (c != '*')
state = DIRECTIVE_C_COMMENT;
break;
case SINGLE_STRING:
if (c == '\'')
{
OutputChar(c, output);
OutputFormat(output->scheme.constant_e, output);
state = NORMAL;
write = 0;
}
else if (c == '\\')
state = SINGLE_STRING_ESC;
break;
case SINGLE_STRING_ESC:
state = SINGLE_STRING;
break;
case DOUBLE_STRING:
if (c == '\"')
{
OutputChar(c, output);
OutputFormat(output->scheme.constant_e, output);
state = NORMAL;
write = 0;
}
else if (c == '\\')
state = DOUBLE_STRING_ESC;
break;
case DOUBLE_STRING_ESC:
state = DOUBLE_STRING;
break;
case DIRECTIVE:
if (c == '\n')
{
OutputFormat(output->scheme.directive_e, output);
state = NORMAL_LINE_START;
}
else if (c == '/')
{
state = DIRECTIVE_HALF_COMMENT;
write = 0;
}
else if (c == '\\')
state = DIRECTIVE_LINE_WRAP;
break;
case DIRECTIVE_LINE_WRAP:
if (c == '\n' || !isspace(c))
state = DIRECTIVE;
break;
case NUMBER_BEFORE_DOT:
if (c == '.')
state = NUMBER_AFTER_DOT;
else if (!isxdigit(c) && c != 'x' && c != 'X' && c != 'l' && c != 'L' && c != 'u' && c != 'U')
{
OutputFormat(output->scheme.constant_e, output);
state = (c == '\n' ? NORMAL_LINE_START : NORMAL);
}
break;
case NUMBER_AFTER_DOT:
if (!isdigit(c))
{
OutputFormat(output->scheme.constant_e, output);
state = (c == '\n' ? NORMAL_LINE_START : NORMAL);
}
break;
case IDENTIFIER:
if ((isalnum(c) || c == '_') && nident < MAXIDENT)
{
ident[nident++] = c;
ident[nident] = '\0';
write = 0;
}
else
{
if (StringArrayFind(&keywords, ident))
{
OutputFormat(output->scheme.keyword_b, output);
OutputString(ident, output);
OutputFormat(output->scheme.keyword_e, output);
}
else
OutputString(ident, output);
state = (c == '\n' ? NORMAL_LINE_START : NORMAL);
}
break;
default:
fprintf(stderr, "We shouldn't be here!\n");
}
if (write)
OutputChar(c, output);
}
OutputFormat(output->scheme.code_e, output);
fclose(fp);
return 1;
}
/*********************************************************************
* OutputChar
*
* Writes a single character, escaping it if necessary
*/
void
OutputChar(char c, OutputFile *output)
{
switch (c)
{
case '<':
case '>':
case '&':
case '|':
fprintf(output->file, output->scheme.specialchar, c);
++output->column;
break;
case ' ':
fprintf(output->file, "%s", output->scheme.space);
++output->column;
break;
case '\t':
do
{
fprintf(output->file, "%s", output->scheme.space);
++output->column;
}
while ((output->column % output->scheme.tabwidth) != 0);
break;
case '\n':
fprintf(output->file, "%s", output->scheme.newline);
output->column = 0;
break;
default:
fputc(c, output->file);
++output->column;
}
}
/*********************************************************************
* OutputString
*
* Writes a string, escaping characters as necessary
*/
void
OutputString(const char *str, OutputFile *output)
{
const char *p = str;
while (*p)
OutputChar(*p++, output);
}
/*********************************************************************
* OutputFormat
*
* Writes a format string
*/
void
OutputFormat(const char *str, OutputFile *output)
{
fprintf(output->file, "%s", str);
}
/*********************************************************************
* StringArrayRead
*
* Reads a file of strings (one string per line) into the
* StringArray structure. The array is then sorted.
*/
int
StringArrayRead(StringArray *array, const char *filename)
{
int blocksize = 8; /* number of extra strings to add when resizing */
int space = blocksize; /* the number of strings that can now fit */
char line[MAXLINE]; /* current line read from the FILE */
FILE *fp; /* pointer to the token FILE */
int n; /* index in current line */
/* initialise the array to contain no elements */
array->num = 0;
array->strings = (char **) malloc(space * sizeof(char *));
/* open the file of strings */
fp = FOpenOnPath(filename, "rt");
if (fp == NULL)
return 0;
/* read the file of strings into the array */
while (!feof(fp))
{
/* read in a line and remove '\n' from the end */
*line = '\0';
fgets(line, MAXLINE, fp);
n = strlen(line) - 1;
if (line[n] == '\n')
line[n] = '\0';
if (!*line)
continue;
/* resize the array so that it contains enough space */
if (array->num >= space)
{
space += blocksize;
array->strings = realloc(array->strings, space * sizeof(char *));
}
/* add the string to the array */
array->strings[array->num] = malloc(strlen(line) + 1);
strcpy(array->strings[array->num], line);
++array->num;
}
fclose(fp);
/* sort the array of strings */
qsort(array->strings, array->num, sizeof(char *), StringArrayCmp);
return 1;
}
/*********************************************************************
* StringArrayCmp
*
* Comparison function for StringArray sorting and searching.
*/
int
StringArrayCmp(const void *a, const void *b)
{
return strcmp((*(const char **) a), (*(const char **) b));
}
/*********************************************************************
* StringArrayFind
*
* Finds a given string in the array, returns NULL if not found.
*/
char *
StringArrayFind(StringArray *array, const char *match)
{
char **retval = bsearch(&match, array->strings, array->num,
sizeof(char *), StringArrayCmp);
if (retval)
return *retval;
return NULL;
}
/*********************************************************************
* SchemeRead
*
* Reads the highlighting scheme to be used from the given file.
*/
int
SchemeRead(Scheme * scheme, const char *filename)
{
FILE *fp;
char *key;
char *value;
fp = FOpenOnPath(filename, "rt");
if (fp == NULL)
{
fprintf(stderr, "Unable to open highlight scheme file %s\n", filename);
return 0;
}
scheme->tabwidth = 2;
scheme->keywords = "";
scheme->space = "";
scheme->newline = "";
scheme->specialchar = "";
scheme->code_b = "";
scheme->code_e = "";
scheme->comment_b = "";
scheme->comment_e = "";
scheme->directive_b = "";
scheme->directive_e = "";
scheme->constant_b = "";
scheme->constant_e = "";
scheme->keyword_b = "";
scheme->keyword_e = "";
while ((key = SchemeReadKey(fp)) != NULL)
{
value = SchemeReadValue(fp);
if (strcasecmp(key, "tabwidth") == 0)
{
scheme->tabwidth = atoi(value);
free(value);
}
else if (strcasecmp(key, "keywords") == 0)
scheme->keywords = value;
else if (strcasecmp(key, "space") == 0)
scheme->space = value;
else if (strcasecmp(key, "newline") == 0)
scheme->newline = value;
else if (strcasecmp(key, "specialchar") == 0)
scheme->specialchar = value;
else if (strcasecmp(key, "code_b") == 0)
scheme->code_b = value;
else if (strcasecmp(key, "code_e") == 0)
scheme->code_e = value;
else if (strcasecmp(key, "comment_b") == 0)
scheme->comment_b = value;
else if (strcasecmp(key, "comment_e") == 0)
scheme->comment_e = value;
else if (strcasecmp(key, "directive_b") == 0)
scheme->directive_b = value;
else if (strcasecmp(key, "directive_e") == 0)
scheme->directive_e = value;
else if (strcasecmp(key, "constant_b") == 0)
scheme->constant_b = value;
else if (strcasecmp(key, "constant_e") == 0)
scheme->constant_e = value;
else if (strcasecmp(key, "keyword_b") == 0)
scheme->keyword_b = value;
else if (strcasecmp(key, "keyword_e") == 0)
scheme->keyword_e = value;
else
free(value);
free(key);
}
fclose(fp);
return 1;
}
/*********************************************************************
* SchemeReadKey
*
* Reads the next key name (for a key-value pair) from the file. A
* key name starts with a '.' at the start of the line and ends with
* a '=' character. The caller is responsible for freeing the string
* returned by this function.
*/
char *
SchemeReadKey(FILE *fp)
{
int currentChar;
int prevChar;
char *key;
int keyLength;
int keyMax;
/* skip everything until we find a line starting with a dot */
prevChar = '\n'; /* when we begin we have just started a new line */
currentChar = fgetc(fp);
while (currentChar != EOF)
{
if (currentChar == '.' && prevChar == '\n')
break;
prevChar = currentChar;
currentChar = fgetc(fp);
}
if (currentChar == EOF)
return NULL;
/* create new string to hold the key we read in */
keyMax = KEYCHUNK;
keyLength = 0;
key = malloc(keyMax);
if (key == NULL)
{
fprintf(stderr, "Out of memory\n");
exit(1);
}
/* read everything up until we hit a '=' */
currentChar = fgetc(fp);
while (currentChar != EOF && currentChar != '=')
{
/* add the character to the string, resizing as necessary */
key[keyLength++] = currentChar;
if (keyLength == keyMax)
{
keyMax += KEYCHUNK;
key = realloc(key, keyMax);
if (key == NULL)
{
fprintf(stderr, "Out of memory\n");
exit(1);
}
}
currentChar = fgetc(fp);
}
key[keyLength] = '\0';
return key;
}
/*********************************************************************
* SchemeReadValue
*
* Reads the next value (for a key-value pair) from the file. A value
* follows on immediately after a key, and ends just before the
* beginning of the next key (denoted by a '.' at the start of the
* line. The caller is responsible for freeing the string returned by
* this function.
*/
char *
SchemeReadValue(FILE *fp)
{
int currentChar;
int prevChar;
char *value;
int valueLength;
int valueMax;
int inComment;
/* create new string to hold the key we read in */
valueMax = VALUECHUNK;
valueLength = 0;
value = malloc(valueMax);
if (value == NULL)
{
fprintf(stderr, "Out of memory\n");
exit(1);
}
/* read everything up until we get a new line starting with a dot */
inComment = 0;
prevChar = 0;
currentChar = fgetc(fp);
while (currentChar != EOF)
{
if (prevChar == '\n')
{
if (currentChar == '.')
{
/* put back '.' character and remove previous newline from string */
ungetc(currentChar, fp);
--valueLength;
break;
}
if (currentChar == '#')
{
/* this is the start of a comment */
inComment = 1;
}
}
if (!inComment)
{
/* add the character to the string, resizing as necessary */
value[valueLength++] = currentChar;
if (valueLength == valueMax)
{
valueMax += VALUECHUNK;
value = realloc(value, valueMax);
if (value == NULL)
{
fprintf(stderr, "Out of memory\n");
exit(1);
}
}
}
if (currentChar == '\n')
{
/* end current comment, if any */
inComment = 0;
}
prevChar = currentChar;
currentChar = fgetc(fp);
}
/* if ended due to EOF, remove last blank line, if any */
if (currentChar == EOF && prevChar == '\n')
--valueLength;
value[valueLength] = '\0';
return value;
}
/*********************************************************************
* FOpenOnPath
*
* Like `fopen' but makes search through PATH environment variable.
*/
FILE *
FOpenOnPath(const char *fname, const char *mode)
{
#if defined(HAVE_SEARCHPATH)
FILE *fp;
char *path;
/* First, try file name as supplied */
if ((fp = fopen(fname, mode)) != NULL)
return fp;
/* Search the PATH */
path = searchpath(fname);
if (path == NULL)
return NULL;
return fopen(path, mode);
#else
FILE *fp;
const char *env, *startp, *endp;
char *buf;
/* First, try file name as supplied */
if ((fp = fopen(fname, mode)) != NULL)
return fp;
if ((env = getenv("PATH")) == NULL)
return NULL;
if ((buf = (char *) malloc(strlen(env) + strlen(fname) + 1)) == NULL)
return NULL;
/* Search the PATH */
startp = env;
do
{
char *p;
/* Get next directory's path */
if ((endp = strchr(startp, PATH_DELIM)) != 0)
{
int s = endp - startp;
strncpy(buf, startp, s);
buf[s] = 0;
startp = endp + 1;
}
else
strcpy(buf, startp);
if ((p = strchr(buf, 0)) != buf)
{
if (*(p - 1) != DIRECTORY_DELIM)
*p++ = DIRECTORY_DELIM;
strcpy(p, fname); /* Append fname to path */
if ((fp = fopen(buf, mode)) != NULL)
break; /* Found and opened! */
}
}
while (endp != 0);
free(buf);
return fp;
#endif
}