%{ /* George Ruban 9-14-95 gmn@cs.bu.edu 2html.lex: Lex program for HTML production. Parses input, puts hotlinks around href'erences, mailto:'s around email addresses, coats w/ begin/end html's. Remember (1) to put paren's around all expressions (2) to avoid in-expression spaces. To make: lex 2html.lex cc -o 2html lex.yy.c -ll To run, for ASCII tab/space formatted text: 2html < inputfile > outputfile.html or 2html inputfile outputfile.html 9-15-95: Modified to give a Title, and
tags. 9-25-95: Modified after Jonathan Sarito "turn --- and ==='s three long or longer into
's and \n\n or longer into's." 10-25-95: Modified to insert time of creation into document, separate printf's rather than string concatenation - cc can't take it, treat \n\r as 1 newline, not 2. Option to not format text failed. 10-26-95: Modified to optionally take input & output on command line, since VMS/VAX allegedly doesn't have easy i/o redirection. 10-30-95: Modified to include news: around newsgroups. 3-20-97: Modified to replace <, >, &, with < >, &. */ #include
/* for printf() */ #include /* for getenv() */ #include /* for strlen() */ #include /* for ctime() */ %} alpha ([a-zA-Z]) sym ([\_\-\~\%\+\'\!]) num ([0-9]) ws ([ \t]) nl ([\n\r]) para ((\n\n+)|(\r\r+)|(\n\r{nl}+)|(\r\n{nl}+)) alphanum ({alpha}|{num}|{sym}) dash ([-=_]) dashlin (({nl}|{para}){ws}*{dash}+{ws}*({nl}|{para})) word ({alphanum}+(\.{alphanum}+)*) server ({word}(:{num}+)?) path ((\/{word})*\/?) email ({word}\@{server}) href ({alpha}+:\/\/{server}{path}) news ({alpha}{3,10}(\.[A-Za-z\-\_]+)+) %% {news} { if(!strncmp(yytext, "rec.", 4) || !strncmp(yytext, "news.", 5) || !strncmp(yytext, "alt.", 4) || !strncmp(yytext, "misc.", 5) || !strncmp(yytext, "soc.", 4) || !strncmp(yytext, "talk.", 5) || !strncmp(yytext, "comp.", 5) || !strncmp(yytext, "clari.", 6) || !strncmp(yytext, "humanities.", 11) || !strncmp(yytext, "biz.", 4)){ fprintf(yyout, "%s", yytext, yytext); } else ECHO; } {para} { fprintf(yyout, "\n \n"); } {dashlin} { fprintf(yyout, "\n
\n"); } {href} { fprintf(yyout, "%s", yytext, yytext); } {email} { fprintf(yyout, "%s", yytext, yytext); } \< { fprintf(yyout, "<"); } \> { fprintf(yyout, ">"); } \& { fprintf(yyout, "&"); } .|\n { ECHO; } %% main(argc, argv) int argc; char *argv[]; { char title[255]; time_t now; /* time of running */ if(argc > 1) { /* arguments are considered to be in and out file */ if(argc != 3){ fprintf(stderr, "Usage: %s [inputfile outputfile]\n", argv[0]); fprintf(stderr, "Converts a text file to HTML, hotlinks links.\n"); fprintf(stderr, "If files unspecified, reads stdin, writes stdout.\n"); fprintf(stderr, "George Ruban 03-20-97, gmn@cs.bu.edu.\n"); exit(1); } yyin = fopen(argv[1], "r"); if(yyin==0){ fprintf(stderr, "Error: Could not open \"%s\" for reading.\n", argv[1]); exit(1); } yyout = fopen(argv[2], "w"); if(yyout==0){ fprintf(stderr, "Error: Could not open \"%s\" for writing.\n", argv[2]); exit(1); } } fprintf(yyout, "\n\n"); fgets(title, 255, yyin); if(strlen(title) > 1){ /* title more than just a blank line */ fprintf(yyout, "%s \n", title); fprintf(yyout, "\n%s
\n", title); } else { fprintf(yyout, "\n"); } fprintf(yyout, "\n\n"); yylex(); /* do the parsing */ fprintf(yyout, "\n\n"); fprintf(yyout, "
\n"); time(&now); fprintf(yyout, "\n"); fprintf(yyout, "Document created %s \n", ctime(&now)); fprintf(yyout, "\n\n"); return 0; }