sunlight
// CPPtoHTML.cpp : C/C++/Java to HTML converter.
//

#ifdef WIN32
#define WIN32_LEAN_AND_MEAN     // Exclude rarely-used stuff from Windows headers
#include <windows.h>
#endif

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#ifndef MAX_PATH
#ifdef _MAX_PATH
#define MAX_PATH    _MAX_PATH
#else
#define MAX_PATH    260
#endif
#endif

bool    g_bHTML = true;

// Keyword list.
// Since a binary search is made of this list, it must be sorted.
const char *szKeywords[] = {
"#define",
"#elif",
"#else",
"#endif",
"#if",
"#ifdef",
"#ifndef",
"#include",
"#pragma",
"__asm",
"__based",
"__cdecl",
"__declspec",
"__except",
"__fastcall",
"__finally",
"__inline",
"__int8",
"__int16",
"__int32",
"__int64",
"__leave",
"__multiple_inheritance",
"__single_inheritance",
"__stdcall",
"__try",
"__uuidof",
"__virtual_inheritance",
"asm",
"auto",
"bad_cast",
"bad_typeid",
"bool",
"boolean",
"break",
"case",
"catch",
"char",
"class",
"const",
"const_cast",
"continue",
"default",
"delete",
"dllexport",
"dllimport",
"do",
"double",
"dynamic_cast",
"else",
"enum",
"except",
"explicit",
"extern",
"false",
"finally",
"float",
"for",
"friend",
"goto",
"if",
"implements",
"import",
"inline",
"int",
"interface",
"long",
"mutable",
"naked",
"namespace",
"new",
"nothrow",
"operator",
"private",
"property",
"protected",
"public",
"register",
"reinterpret_cast",
"return",
"selectany",
"short",
"signed",
"sizeof",
"static",
"static_cast",
"struct",
"switch",
"template",
"this",
"thread",
"throw",
"true",
"try",
"type_info",
"typedef",
"typeid",
"typename",
"union",
"unsigned",
"using",
"uuid",
"virtual",
"void",
"volatile",
"while",
};

// CSS stylesheet used for LINK element.
char    szStylesheet[MAX_PATH] = "default.css";

// String compare function for bsearch.
int BSearchStrCmp(const void *p1, const void *p2)
{
    return strcmp((const char *)p1, *(const char **)p2);
}

void FormatCode(const char *szPath, FILE *fIn, FILE *fOut)
{
    // Read token by token, reading extra lines as appropriate
    bool    bEndOfLine = true;
static  char    szLine[1024], szWord[256];

    bool bInChar = false, bInComment = false, bInSLComment = false, bInString = false;

    // Get the filename base from the path.
static  char    szDrive[_MAX_DRIVE], szDir[_MAX_DIR], szName[_MAX_FNAME + _MAX_EXT], szExt[_MAX_EXT];
    _splitpath(szPath, szDrive, szDir, szName, szExt);
    strcat(szName, szExt);

    // HTML header.
    if (g_bHTML)
        fprintf(fOut, "<html>\n"
            "<head>\n"
            "<link rel='stylesheet' href='%s'>\n"
            "<title>%s</title>\n"
            "</head>\n"
            "<body>\n"
            "<pre class='Code'>", szStylesheet, szName);
    else
        fprintf(fOut, "[code][size=2]");

    while (fgets(szLine, 1024, fIn) != NULL)
    {
        int i, n, nCol = 0;
        for (i = 0; szLine[i]; i++)
        {
            switch (szLine[i])
            {
            case ' ':
                fputc(' ', fOut);
                nCol++;
                break;

            case '<':
                if (g_bHTML)
                    fprintf(fOut, "&lt;");
                else
                    fputc('<', fOut);
                nCol++;
                break;

            case '>':
                if (g_bHTML)
                    fprintf(fOut, "&gt;");
                else
                    fputc('>', fOut);
                nCol++;
                break;

            case '&':
                if (g_bHTML)
                    fprintf(fOut, "&amp;");
                else
                    fputc('&', fOut);
                nCol++;
                break;

            case '\t':
                // Tab stops every 4 characters.
                n = 4 - nCol % 4;
                nCol += n;
                for (; n > 0; n--)
                    fputc(' ', fOut);
                break;

            case '\n':
                fputc('\n', fOut);
                break;

            case '\\':
                // Double-slashes are quietly removed, so that it does not
                // affect the processing of strings (i.e. "\\" versus "\"")
                if ((i > 0) && (szLine[i - 1] == '\\'))
                    szLine[i] = ' ';
                
                fputc('\\', fOut);
                nCol++;
                break;

            case '/':
                if (!bInString && !bInComment && !bInSLComment && !bInChar)
                {
                    // Look for block or single-line comment opening
                    if (szLine[i + 1] == '/')
                    {
                        bInSLComment = true;
                        if (g_bHTML)
                            fprintf(fOut, "<span class='Comment'>");
                        else
                            fprintf(fOut, "[green]");
                    }
                    else if (szLine[i + 1] == '*')
                    {
                        if (g_bHTML)
                            fprintf(fOut, "<span class='Comment'>");
                        else
                            fprintf(fOut, "[green]");
                        bInComment = true;
                    }
                }
                else if (bInComment)
                {
                    // Look for block comment closing
                    if ((i > 0) && (szLine[i - 1] == '*'))
                    {
                        if (g_bHTML)
                            fprintf(fOut, "/</span>");
                        else
                            fprintf(fOut, "[/green]");
                        nCol++;
                        bInComment = false;
                        break;
                    }
                }
                    
                fputc('/', fOut);
                nCol++;
                break;

            case '\'':
                if (!bInString && !bInComment && !bInSLComment)
                {
                    // Start or end of a character.
                    if ((i > 0) && (szLine[i - 1] == '\\'))
                        fprintf(fOut, "'");
                    else
                    {
                        bInChar = !bInChar;
                        if (bInChar)
                        {
                            if (g_bHTML)
                                fprintf(fOut, "<span class='String'>'");
                            else
                                fputc('\'', fOut);
                        }
                        else
                        {
                            if (g_bHTML)
                                fprintf(fOut, "'</span>");
                            else
                                fputc('\'', fOut);
                        }
                    }
                }
                else
                    fprintf(fOut, "'");
                nCol++;
                break;

            case '\"':
                if (!bInChar && !bInComment && !bInSLComment)
                {
                    // Start or end of a string.
                    if ((i > 0) && (szLine[i - 1] == '\\'))
                        fprintf(fOut, "&quot;");
                    else
                    {
                        bInString = !bInString;
                        if (bInString)
                        {
                            if (g_bHTML)
                                fprintf(fOut, "<span class='String'>&quot;");
                            else
                                fputc('\"', fOut);
                        }
                        else
                        {
                            if (g_bHTML)
                                fprintf(fOut, "&quot;</span>");
                            else
                                fputc('\"', fOut);
                        }
                    }
                }
                else
                {
                    if (g_bHTML)
                        fprintf(fOut, "&quot;");
                    else
                        fputc('\"', fOut);
                }
                nCol++;
                break;

            default:
                if (!isascii(szLine[i]))
                {
                    if (g_bHTML)
                        fprintf(fOut, "&#x%02X;", (int)(unsigned char)szLine[i]);
                    else
                        fputc(szLine[i], fOut);
                    nCol++;
                    break;
                }
                if (!bInChar && !bInComment && !bInSLComment && !bInString)
                {
                    if (isdigit(szLine[i]))
                    {
                        // Start of a number (all numbers start with a digit,
                        //  and contain alphanumeric characters or periods).
                        for (int j = 0; isalnum(szLine[i + j]) || (szLine[i + j] == '.'); j++)
                            szWord[j] = szLine[i + j];
                        i += j - 1;
                        nCol += j;
                        szWord[j] = '\0';
                        if (g_bHTML)
                            fprintf(fOut, "<span class='Number'>%s</span>", szWord);
                        else
                            fprintf(fOut, "[purple]%s[/purple]", szWord);
                        break;
                    }
                    if (isalpha(szLine[i]) || (szLine[i] == '#'))
                    {
                        // Start of an identifier. Identifiers start with a letter or #,
                        //  and contain alphanumeric characters, # or _.
                        for (int j = 0; isalnum(szLine[i + j]) || (szLine[i + j] == '#') || (szLine[i + j] == '_'); j++)
                            szWord[j] = szLine[i + j];
                        i += j - 1;
                        nCol += j;
                        szWord[j] = '\0';
                        
                        // Look for the keyword in our dictionary.
                        // Use bsearch for a fast binary search through the keyword array.
                        char *p = (char *)bsearch(&szWord, szKeywords, 
                            (sizeof(szKeywords) / sizeof(szKeywords[0])),
                            sizeof(szKeywords[0]),
                            BSearchStrCmp);
                        if (p != NULL)
                        {
                            if (g_bHTML)
                                fprintf(fOut, "<span class='Keyword'>%s</span>", szWord);
                            else
                                fprintf(fOut, "[blue]%s[/blue]", szWord);
                        }
                        else
                            fprintf(fOut, "%s", szWord);
                        break;
                    }
                }
                fputc(szLine[i], fOut);
                nCol++;
                break;
            }
        }
        // Single-line comments end at the end of the line.
        if (bInSLComment)
        {
            bInSLComment = false;
            if (g_bHTML)
                fprintf(fOut, "</span>");
            else
                fprintf(fOut, "[/green]");
        }
    }
    // Close any <span> tags.
    if ((bInChar || bInString) && g_bHTML)
        fprintf(fOut, "</span>");
    if (bInComment)
    {
        if (g_bHTML)
            fprintf(fOut, "</span>");
        else
            fprintf(fOut, "[/green]");
    }
    // HTML footer.
    if (g_bHTML)
        fprintf(fOut, "</pre>\n</body>\n</html>\n");
    else
        fprintf(fOut, "[/size][/code]");
}

int main(int argc, char* argv[])
{
    printf("C/C++/Java to HTML Converter\n");
    printf(" Copyright (C) David McCabe, 2000.\n\n");

    if (argc < 2)
    {
        printf("cpptohtml: Usage: cpptohtml [-s stylesheet] infile [infile...]\n");
        return 1;
    }

    int     i;
    char    szOutFile[MAX_PATH];
    FILE    *fIn, *fOut;

#ifdef WIN32
    // On Windows, we can look up the default stylesheet in the registry.
    HKEY    hKey;
    DWORD   dwType, cbData;

    if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\cpptohtml", 0, KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS)
    {
        cbData = MAX_PATH;
        if (RegQueryValueEx(hKey, "Default Stylesheet", NULL, &dwType, (LPBYTE)szOutFile, &cbData) == ERROR_SUCCESS)
        {
            if (dwType == REG_SZ)
                strcpy(szStylesheet, szOutFile);
        }
        RegCloseKey(hKey);
    }
    if (RegOpenKeyEx(HKEY_CURRENT_USER, "SOFTWARE\\cpptohtml", 0, KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS)
    {
        cbData = MAX_PATH;
        if (RegQueryValueEx(hKey, "Default Stylesheet", NULL, &dwType, (LPBYTE)szOutFile, &cbData) == ERROR_SUCCESS)
        {
            if (dwType == REG_SZ)
                strcpy(szStylesheet, szOutFile);
        }
        RegCloseKey(hKey);
    }
#endif

    for (i = 1; i < argc; i++)
    {
        if ((argv[i][0] == '-') || (argv[i][0] == '/'))
        {
            switch (argv[i][1])
            {
            case 's':
                if (i < (argc - 1))
                {
                    i++;
                    strcpy(szStylesheet, argv[i]);
                }
                else
                {
                    printf("cpptohtml: -s option requires a stylesheet.\n");
                }
                break;

            case 'p':   // ProgrammersHeaven-style coding
                g_bHTML = false;
                break;

            case 'h':   // HTML coding
                g_bHTML = true;
                break;

            default:
                printf("cpptohtml: unknown argument '%c'.\n", argv[i][1]);
                break;
            }
        }
        else
        {
            fIn = fopen(argv[i], "r");
            if (!fIn)
            {
                printf("cpptohtml: Cannot open %s\n", argv[i]);
                continue;
            }

            strcpy(szOutFile, argv[i]);
            if (g_bHTML)
                strcat(szOutFile, ".html");
            else
                strcat(szOutFile, ".txt");

            fOut = fopen(szOutFile, "w");
            if (!fOut)
            {
                printf("cpptohtml: Cannot open temporary file\n");
                return 1;
            }

            FormatCode(argv[i], fIn, fOut);

            fclose(fIn);
            fclose(fOut);
        }   
    }
    return 0;
}

 

Copyright © David McCabe, 1998 - 2001. All rights reserved.

You will need to download and install the m-math control to display any equations on this Web site. Without this control, you will not see most of the equations. Please do not e-mail me asking why the equations do not display!

[an error occurred while processing this directive]