/* overx.c - Merge lines with overstrikes in text files */ /* Copyright (c) 1998 by Ira E McDonald (High North Inc) */ /* NOTE -- Please read full description at 'overx_synopsis[]' */ /* later in this program source file. */ /* NOTE -- This program uses strictly ISO 9899:1990 (ISO C) */ /* compliant header files, functions, and statements. */ /* However, this program declares BOTH strictly ISO C */ /* compliant fully specified function prototypes AND */ /* traditional C function declarations for greater */ /* portability to older systems (eg, SunOS). */ /* NOTE -- This program compiles warning free in strict ISO C. */ /* Standard Header Files */ #include #include #include #include #include /* Standard Logicals for C (Pascal Style) */ /* NOTE -- The specific intent behind the use of the 'Standard */ /* Logicals for C' in a program is to ensure: 1) the */ /* appearance of an equal sign ('=') ALWAYS indicates */ /* an assignment operation; 2) the appearance of an */ /* ampersand ('&') ALWAYS indicates EITHER a bitwise */ /* AND or a Reference operation; and 3) the appearance */ /* of a vertical bar ('|') ALWAYS indicates a bitwise */ /* OR operation. */ /* NOTE -- For strict compliance with ISO 9899 AM1:1994 (ISO C) */ /* the following logicals should be in lowercase, */ /* as per the header file 'iso646.h'. */ #ifdef __STDC__ #undef LT #undef GT #undef LE #undef GE #undef EQ #undef NE #undef AND #undef OR #undef NOT #undef TRUE #undef FALSE #endif #define LT < #define GT > #define LE <= #define GE >= #define EQ == #define NE != #define AND && #define OR || #define NOT ! #define TRUE 1 #define FALSE 0 /* Standard Simple Types for C */ /* NOTE -- The specific intent behind the use of the 'Standard */ /* Simple Types for C' is to ensure: 1) the maximal */ /* clarity of intended program behavior; and 2) the use */ /* of a single token for any simple type declaration. */ /* NOTE -- For strict compliance with the 'Clean C' dialect, */ /* compatible with both ISO C and ISO C++ (draft) and */ /* described in 'C: A Reference Manual' by Harbison */ /* and Steele (4th Edition, January 1995), the simple */ /* type 'boolean' should be respelled 'bool'. */ #ifdef __STDC__ #undef uchar #undef ushort #undef uint #undef ulong #undef boolean #endif #define uchar unsigned char #define ushort unsigned short #define uint unsigned int #define ulong unsigned long #define boolean unsigned char /*********************************************************************/ /* Global constants */ /*********************************************************************/ #define OVERX_READ_MAX 2048 /* Buffer Read Max Length */ #define OVERX_FRAG_MAX 50 /* Fragment Max */ /*********************************************************************/ /* Global variables */ /*********************************************************************/ char *overx_synopsis[] = { /* 'overx_synopsis' */ "Usage: overx [-cdgv] filename ...", "", " filename = < file_root.file_ext >", " -c Replace each control char with a space (' ')", " else, replace control char with a dot ('.')", " -d Replace each DEL char with a space (' ')", " else, replace DEL char with a dot ('.')", " -g Replace each graphic char with a space (' ')", " else, replace graphic char with a dot ('.')", " -v Verbose log output mode", " (HINT - redirect verbose log output to a file)", "", "Ex: overx -v myfile.txt >myfile.log", " (merge 'myfile.txt',", " verbose log to file 'myfile.log',", " new merged version to 'myfile.out')", "", "Note: The 'overx' result is written to 'file_root.out'", "", "'overx' is a utility to merge overstrike lines in text files", "Copyright (c) 1998 by Ira E McDonald (High North Inc)", NULL }; /* 'overx_synopsis' */ char overx_ibuffer[OVERX_READ_MAX+2];/* Input File Buffer */ char overx_obuffer[OVERX_READ_MAX+2];/* Output File Buffer */ typedef struct frag { char *ptr; /* Fragment string pointer */ int len; /* Fragment string length */ } FRAG; FRAG overx_frag[OVERX_FRAG_MAX]; /* Fragment info table */ /*********************************************************************/ /* Global function declarations */ /*********************************************************************/ #ifdef __STDC__ extern int main ( /* Mainline */ int argc, /* Argument Count */ char *argv[]); /* Argument Vector */ #else extern int main (); /* Mainline */ #endif #ifdef __STDC__ static void overx_show_synopsis ( /* Display Program Synopsis */ void); /* No Arguments */ #else static void overx_show_synopsis (); /* Display Program Synopsis */ #endif #ifdef __STDC__ static void overx_scan_file ( /* Scan File */ char *ifn, /* Input File Name */ boolean cf, /* Control To Space Flag */ boolean df, /* DEL To Space Flag */ boolean gf, /* Graphic To Space Flag */ boolean vf); /* Verbose Flag */ #else static void overx_scan_file (); /* Scan File */ #endif #ifdef __STDC__ static void overx_error_exit ( /* Log Error and Exit */ char *es); /* Error Message String */ #else static void overx_error_exit (); /* Log Error and Exit */ #endif /*********************************************************************/ /* Function: main() */ /*********************************************************************/ #ifdef __STDC__ extern int main ( /* Mainline */ int argc, /* Argument Count */ char *argv[]) /* Argument Vector */ #else extern int main ( /* Mainline */ argc, /* Argument Count */ argv) /* Argument Vector */ int argc; /* Argument Count */ char *argv[]; /* Argument Vector */ #endif { /* 'main' */ int ai; /* Argument Index */ char *ap; /* Argument Pointer */ boolean cf = FALSE; /* Control Flag */ boolean df = FALSE; /* DEL Flag */ boolean gf = FALSE; /* Graphic Flag */ boolean vf = FALSE; /* Verbose Flag */ /* Check for missing input arguments */ if (argc LT 2) overx_show_synopsis (); /* Check for options */ for (ai = 1; ai LT argc; ai++) { /* Get pointer to next input argument */ ap = argv[ai]; /* Check for for leading dash (option token) */ if (*ap NE '-') break; /* Parse zero or more options (case insensitive) */ for (ap++; *ap; ap++) { switch (*ap) { case 'c': /* Control Chars to Spaces */ case 'C': /* Control Chars to Spaces */ cf = TRUE; break; case 'd': /* DEL Chars to Spaces */ case 'D': /* DEL Chars to Spaces */ df = TRUE; break; case 'g': /* Graphic Chars to Spaces */ case 'G': /* Graphic Chars to Spaces */ gf = TRUE; break; case 'v': /* Verbose */ case 'V': /* Verbose */ vf = TRUE; break; default: break; } } } /* Check for missing input filename(s) */ if (ai GE argc) overx_show_synopsis (); /* Process all input files */ for (; ai LT argc; ai++) { /* Get next input filename */ ap = argv[ai]; if (strlen (ap) EQ 0) overx_error_exit ("Missing iuput filename"); /* Process next input file */ overx_scan_file (ap, cf, df, gf, vf); } (void) printf ("overx: Normal termination\n"); return (0); } /* 'main' */ /*********************************************************************/ /* Function: overx_show_synopsis() */ /*********************************************************************/ #ifdef __STDC__ static void overx_show_synopsis ( /* Display Program Synopsis */ void) /* No Arguments */ #else static void overx_show_synopsis ( /* Display Program Synopsis */ ) /* No Arguments */ #endif { /* 'overx_show_synopsis' */ int si; /* Synopsis Index */ /* Display program synopsis */ for (si = 0; overx_synopsis[si]; si++) (void) printf ("%s\n", overx_synopsis[si]); exit (0); } /* 'overx_show_synopsis' */ /*********************************************************************/ /* Function: overx_scan_file() */ /*********************************************************************/ #ifdef __STDC__ static void overx_scan_file ( /* Scan File */ char *ifn, /* Input File Name */ boolean cf, /* Control To Space Flag */ boolean df, /* DEL To Space Flag */ boolean gf, /* Graphic To Space Flag */ boolean vf) /* Verbose Flag */ #else static void overx_scan_file ( /* Scan File */ ifn, /* Input File Name */ cf, /* Control To Space Flag */ df, /* DEL To Space Flag */ gf, /* Graphic To Space Flag */ vf) /* Verbose Flag */ char *ifn; /* Input File Name */ boolean cf; /* Control Flag */ boolean df; /* DEL Flag */ boolean gf; /* Graphic Flag */ boolean vf; /* Verbose Flag */ #endif { /* 'overx_scan_file' */ boolean ff = FALSE; /* Current Formfeed Flag */ boolean pf = FALSE; /* Pending Formfeed Flag */ static FILE *ifp = NULL; /* Input File Pointer */ char *ixp; /* Input Excess Pointer */ int ixl; /* Input Excess Length */ char *ibp; /* Input Buffer Pointer */ int ibl; /* Input Buffer Length */ int ibc; /* Input Buffer Count */ long icc; /* Input Control Count */ long idc; /* Input DEL Count */ long ifc; /* Input Formfeed Count */ long igc; /* Input Graphic Count */ long ilc; /* Input Line Count */ char *lfp; /* Line Fragment Pointer */ int lfl; /* Line Fragment Length */ int lfc; /* Line Fragment Count */ int mrl; /* Max Raw Length */ int mfl; /* Max Fragment Length */ int mfc; /* Max Fragment Count */ static FILE *ofp = NULL; /* Output File Pointer */ char *ofn; /* Output File Name */ char ofs[80]; /* Output File String */ char *obp; /* Output Buffer Pointer */ int obl; /* Output Buffer Length */ int obc; /* Output Buffer Count */ int owl; /* Output Write Length */ char es[80]; /* Error Message String */ /* Construct output file name */ ofn = &ofs[0]; strcpy (ofn, ifn); while ((*ofn NE '.') AND (*ofn NE '\0')) ofn++; strcpy (ofn, ".out"); ofn = &ofs[0]; if (strcmp (ifn, ofn) EQ 0) { (void) sprintf (es, "Input filename ('%s') EQ output filename", ifn); overx_error_exit (es); } /* Open input file */ if (ifp) ifp = freopen (ifn, "rb", ifp); else ifp = fopen (ifn, "rb"); if (NOT ifp) { (void) sprintf (es, "Error opening input file '%s'", ifn); overx_error_exit (es); } /* Open output file */ if (ofp) ofp = freopen (ofn, "wb", ofp); else ofp = fopen (ofn, "wb"); if (NOT ofp) { (void) sprintf (es, "Error opening output file '%s'", ofn); overx_error_exit (es); } /* Read and scan input file */ ixp = (char *) 0; ixl = 0; mrl = mfl = mfc = 0; for (icc = idc = ifc = igc = ilc = 0; ;) { /* Fill input buffer with binary data (or to end-of-file) */ ibp = &overx_ibuffer[0]; ibp += ixl; ibl = OVERX_READ_MAX; ibl -= ixl; ibl = fread (ibp, 1, ibl, ifp); if ((NOT ibl) AND (NOT ixl)) break; ibl += ixl; /* Check for pending formfeed (end-of-page) */ if (pf) { ff = TRUE; pf = FALSE; } /* Clear the fragment table */ for (lfc = 0; lfc LT OVERX_FRAG_MAX; lfc++) { overx_frag[lfc].ptr = (char *) 0; overx_frag[lfc].len = 0; } /* Scan fragments and convert controls, graphics, and DELs */ lfc = 0; ibp = &overx_ibuffer[0]; for (ibc = 0; ibc LT ibl; ibc++, ibp++) { /* Save address of next fragment string */ if (NOT overx_frag[lfc].ptr) overx_frag[lfc].ptr = lfp = ibp; /* Check for linefeed/newline (end-of-line) */ if (*ibp EQ '\n') { lfl = (int) (ibp - lfp); overx_frag[lfc].len = lfl; if (lfl GT mfl) mfl = lfl; lfc++; ilc++; break; } /* Check for carriage return (end-of-fragment) */ if (*ibp EQ '\r') { lfl = (int) (ibp - lfp); overx_frag[lfc].len = lfl; if (lfl GT mfl) mfl = lfl; lfc++; /* Check for too many fragments */ if (lfc LT OVERX_FRAG_MAX) continue; (void) fclose (ofp); (void) sprintf (es, "Error too many fragments - line '%ld' in '%s'", ++ilc, ifn); overx_error_exit (es); } /* Check for formfeed (end-of-page) */ if (*ibp EQ '\f') { /* Check for formfeed as FIRST character of line */ if (ibp EQ &overx_ibuffer[0]) ff = TRUE; else pf = TRUE; ifc++; *ibp = ' '; /* Is this a good idea?? */ continue; } /* Check for graphic character */ if (*ibp & 0x80) { if (gf) *ibp = ' '; else *ibp = '.'; igc++; continue; } /* Check for control character */ if (*ibp LE 0x1F) { if (cf) *ibp = ' '; else *ibp = '.'; icc++; continue; } /* Check for DEL character */ if (*ibp EQ 0x7F) { if (df) *ibp = ' '; else *ibp = '.'; idc++; continue; } } /* Check for missing linefeed (no end-of-line found) */ if (ibc GE ibl) { /* Close last line fragment */ lfl = (int) (ibp - lfp); overx_frag[lfc].len = lfl; if (lfl GT mfl) mfl = lfl; lfc++; ilc++; (void) printf ( "Warning no linefeed - line '%ld' in '%s'\n", ilc, ifn); ibc--; ibp--; } /* Remember the input buffer excess */ ibc++; ibp++; ixp = ibp; ixl = ibl - ibc; /* Check for maximum raw length or fragments count */ if (mrl LT ibc) mrl = ibc; if (mfc LT lfc) mfc = lfc; /* Initialize output buffer to all spaces */ obl = 0; obp = &overx_obuffer[0]; for (obc = 0; obc LT OVERX_READ_MAX; obc++, obp++) *obp = ' '; /* Merge fragments to output buffer */ for (lfc = 0; lfc LT OVERX_FRAG_MAX; lfc++) { /* Check for last fragement */ lfp = overx_frag[lfc].ptr; if (NOT lfp) break; /* Check for empty fragment */ lfl = overx_frag[lfc].len; if (NOT lfl) continue; /* Check for this fragment longer than previous fragment*/ if (obl LT lfl) obl = lfl; /* Check for formfeed needed in this output line */ obp = &overx_obuffer[0]; /* Merge this fragment */ for (obc = 0; obc LT lfl; obc++, lfp++, obp++) { /* Check for same character in previous fragment */ if (*obp EQ *lfp) continue; /* Check for space in previous fragment */ if (*obp EQ ' ') { /* Copy any character over a previous space */ *obp = *lfp; continue; } /* Check for underscore in previous fragment */ if (*obp EQ '_') { /* Check for non-space in current fragment */ if (*lfp NE ' ') *obp = *lfp; continue; } } } /* Append (carriage return and) linefeed to output buffer */ obp = &overx_obuffer[0]; obp += obl; #ifdef OSMSDOS *obp++ = '\r'; obl++; #endif *obp++ = '\n'; obl++; /* Insert formfeed, if necessary, in output buffer */ if (ff) { obp = &overx_obuffer[0]; obp += obl; while (obp NE &overx_obuffer[0]) { *obp = *(obp - 1); obp--; } obp = &overx_obuffer[0]; *obp = '\f'; obl++; ff = pf = FALSE; } /* Write line to output file */ obp = &overx_obuffer[0]; owl = fwrite (obp, 1, obl, ofp); if (owl NE obl) { (void) fclose (ofp); (void) sprintf (es, "Error writing output - line '%ld' file '%s'", ilc, ofn); overx_error_exit (es); } /* Move up remainder of input buffer for next line cycle */ if (NOT ixl) continue; ibp = &overx_ibuffer[0]; memcpy (ibp, ixp, ixl); } /* Close input file */ (void) fclose (ifp); /* Close output file */ (void) fclose (ofp); /* Report on this file */ (void) printf ( "From='%s' To='%s' Pages=%ld Lines=%ld\n", ifn, ofn, (ifc + 1), ilc); /* Check for verbose report mode */ if (NOT vf) return; (void) printf ( "<>\n", icc, ifc, igc, idc); (void) printf ( "<>\n", mrl, mfl, mfc); return; } /* 'overx_scan_file' */ /*********************************************************************/ /* Function: overx_error_exit() */ /*********************************************************************/ #ifdef __STDC__ static void overx_error_exit ( /* Log Error and Exit */ char *es) /* Error Message String */ #else static void overx_error_exit ( /* Log Error and Exit */ es) /* Error Message String */ char *es; /* Error Message String */ #endif { /* 'overx_error_exit' */ (void) printf ("overx: %s\n", es); (void) printf ("overx: Abnormal termination\n"); exit (1); } /* 'overx_error_exit' */