src/third_party/icu/source/test/perf/ubrkperf/ubrkperfold.cpp - cobalt - Git at Google

 /********************************************************************
  * COPYRIGHT:
  * Copyright (C) 2001-2012 IBM, Inc.   All Rights Reserved.
  *
  ********************************************************************/
 /********************************************************************************
 *
 * File ubrkperf.cpp
 *
 * Modification History:
 *        Name                     Description
 *     Vladimir Weinstein          First Version, based on collperf
 *
 *********************************************************************************
 */

 //
 //  This program tests break iterator performance
 //      Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs
 //      (if any)
 //      A text file is required as input.  It must be in utf-8 or utf-16 format,
 //      and include a byte order mark.  Either LE or BE format is OK.
 //

 const char gUsageString[] =
  "usage:  ubrkperf options...\n"
     "-help                      Display this message.\n"
     "-file file_name            utf-16/utf-8 format file.\n"
     "-locale name               ICU locale to use.  Default is en_US\n"
     "-langid 0x1234             Windows Language ID number.  Default to value for -locale option\n"
     "                              see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
     "-win                       Run test using Windows native services. (currently not working) (ICU is default)\n"
     "-unix                      Run test using Unix word breaking services. (currently not working) \n"
     "-mac                       Run test using MacOSX word breaking services.\n"
     "-uselen                    Use API with string lengths.  Default is null-terminated strings\n"
     "-char                      Use character break iterator\n"
     "-word                      Use word break iterator\n"
     "-line                      Use line break iterator\n"
     "-sentence                  Use sentence break iterator\n"
     "-loop nnnn                 Loopcount for test.  Adjust for reasonable total running time.\n"
     "-iloop n                   Inner Loop Count.  Default = 1.  Number of calls to function\n"
     "                               under test at each call point.  For measuring test overhead.\n"
     "-terse                     Terse numbers-only output.  Intended for use by scripts.\n"
     "-dump                      Display stuff.\n"
     "-capi                      Use C APIs instead of C++ APIs (currently not working)\n"
     "-next                      Do the next test\n"
     "-isBound                   Do the isBound test\n"
     ;


 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <math.h>
 #include <locale.h>
 #include <errno.h>
 #include <sys/stat.h>

 #include <unicode/utypes.h>
 #include <unicode/ucol.h>
 #include <unicode/ucoleitr.h>
 #include <unicode/uloc.h>
 #include <unicode/ustring.h>
 #include <unicode/ures.h>
 #include <unicode/uchar.h>
 #include <unicode/ucnv.h>
 #include <unicode/utf8.h>

 #include <unicode/brkiter.h>


 #if U_PLATFORM_HAS_WIN32_API
 #include <windows.h>
 #else
 //
 //  Stubs for Windows API functions when building on UNIXes.
 //
 #include <sys/time.h>
 unsigned long timeGetTime() {
     struct timeval t;
     gettimeofday(&t, 0);
     unsigned long val = t.tv_sec * 1000;  // Let it overflow.  Who cares.
     val += t.tv_usec / 1000;
     return val;
 };
 #define MAKELCID(a,b) 0
 #endif


 //
 //  Command line option variables
 //     These global variables are set according to the options specified
 //     on the command line by the user.
 char * opt_fName      = 0;
 char * opt_locale     = "en_US";
 int    opt_langid     = 0;         // Defaults to value corresponding to opt_locale.
 char * opt_rules      = 0;
 UBool  opt_help       = FALSE;
 int    opt_time       = 0;
 int    opt_loopCount  = 0;
 int    opt_passesCount= 1;
 UBool  opt_terse      = FALSE;
 UBool  opt_icu        = TRUE;
 UBool  opt_win        = FALSE;      // Run with Windows native functions.
 UBool  opt_unix       = FALSE;      // Run with UNIX strcoll, strxfrm functions.
 UBool  opt_mac        = FALSE;      // Run with MacOSX word break services.
 UBool  opt_uselen     = FALSE;
 UBool  opt_dump       = FALSE;
 UBool  opt_char       = FALSE;
 UBool  opt_word       = FALSE;
 UBool  opt_line       = FALSE;
 UBool  opt_sentence   = FALSE;
 UBool  opt_capi       = FALSE;

 UBool  opt_next       = FALSE;
 UBool  opt_isBound    = FALSE;


 //
 //   Definitions for the command line options
 //
 struct OptSpec {
     const char *name;
     enum {FLAG, NUM, STRING} type;
     void *pVar;
 };

 OptSpec opts[] = {
     {"-file",        OptSpec::STRING, &opt_fName},
     {"-locale",      OptSpec::STRING, &opt_locale},
     {"-langid",      OptSpec::NUM,    &opt_langid},
     {"-win",         OptSpec::FLAG,   &opt_win},
     {"-unix",        OptSpec::FLAG,   &opt_unix},
     {"-mac",         OptSpec::FLAG,   &opt_mac},
     {"-uselen",      OptSpec::FLAG,   &opt_uselen},
     {"-loop",        OptSpec::NUM,    &opt_loopCount},
     {"-time",        OptSpec::NUM,    &opt_time},
     {"-passes",      OptSpec::NUM,    &opt_passesCount},
     {"-char",        OptSpec::FLAG,   &opt_char},
     {"-word",        OptSpec::FLAG,   &opt_word},
     {"-line",        OptSpec::FLAG,   &opt_line},
     {"-sentence",    OptSpec::FLAG,   &opt_sentence},
     {"-terse",       OptSpec::FLAG,   &opt_terse},
     {"-dump",        OptSpec::FLAG,   &opt_dump},
     {"-capi",        OptSpec::FLAG,   &opt_capi},
     {"-next",        OptSpec::FLAG,   &opt_next},
     {"-isBound",     OptSpec::FLAG,   &opt_isBound},
     {"-help",        OptSpec::FLAG,   &opt_help},
     {"-?",           OptSpec::FLAG,   &opt_help},
     {0, OptSpec::FLAG, 0}
 };


 //---------------------------------------------------------------------------
 //
 //  Global variables pointing to and describing the test file
 //
 //---------------------------------------------------------------------------

 //DWORD          gWinLCID;
 BreakIterator *brkit = NULL;
 UChar *text = NULL;
 int32_t textSize = 0;


 #if U_PLATFORM_IS_DARWIN_BASED
 #include <ApplicationServices/ApplicationServices.h>
 enum{
   kUCTextBreakAllMask = (kUCTextBreakClusterMask | kUCTextBreakWordMask | kUCTextBreakLineMask)
     };
 UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};
 TextBreakLocatorRef breakRef;
 UCTextBreakType macBreakType;

 void createMACBrkIt() {
   OSStatus status = noErr;
   LocaleRef lref;
   status = LocaleRefFromLocaleString(opt_locale, &lref);
   status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef);
   if(opt_char == TRUE) {
     macBreakType = kUCTextBreakClusterMask;
   } else if(opt_word == TRUE) {
     macBreakType = kUCTextBreakWordMask;
   } else if(opt_line == TRUE) {
     macBreakType = kUCTextBreakLineMask;
   } else if(opt_sentence == TRUE) {
     // error
     // brkit = BreakIterator::createSentenceInstance(opt_locale, status);
   } else {
     // default is character iterator
     macBreakType = kUCTextBreakClusterMask;
       }
 }
 #endif

 void createICUBrkIt() {
   //
   //  Set up an ICU break iterator
   //
   UErrorCode          status = U_ZERO_ERROR;
   if(opt_char == TRUE) {
     brkit = BreakIterator::createCharacterInstance(opt_locale, status);
   } else if(opt_word == TRUE) {
     brkit = BreakIterator::createWordInstance(opt_locale, status);
   } else if(opt_line == TRUE) {
     brkit = BreakIterator::createLineInstance(opt_locale, status);
   } else if(opt_sentence == TRUE) {
     brkit = BreakIterator::createSentenceInstance(opt_locale, status);
   } else {
     // default is character iterator
     brkit = BreakIterator::createCharacterInstance(opt_locale, status);
   }
   if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
     fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
   }
   if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
     fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
   }

 }

 //---------------------------------------------------------------------------
 //
 //  ProcessOptions()    Function to read the command line options.
 //
 //---------------------------------------------------------------------------
 UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
 {
     int         i;
     int         argNum;
     const char  *pArgName;
     OptSpec    *pOpt;

     for (argNum=1; argNum<argc; argNum++) {
         pArgName = argv[argNum];
         for (pOpt = opts;  pOpt->name != 0; pOpt++) {
             if (strcmp(pOpt->name, pArgName) == 0) {
                 switch (pOpt->type) {
                 case OptSpec::FLAG:
                     *(UBool *)(pOpt->pVar) = TRUE;
                     break;
                 case OptSpec::STRING:
                     argNum ++;
                     if (argNum >= argc) {
                         fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
                         return FALSE;
                     }
                     *(const char **)(pOpt->pVar)  = argv[argNum];
                     break;
                 case OptSpec::NUM:
                     argNum ++;
                     if (argNum >= argc) {
                         fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
                         return FALSE;
                     }
                     char *endp;
                     i = strtol(argv[argNum], &endp, 0);
                     if (endp == argv[argNum]) {
                         fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
                         return FALSE;
                     }
                     *(int *)(pOpt->pVar) = i;
                 }
                 break;
             }
         }
         if (pOpt->name == 0)
         {
             fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
             return FALSE;
         }
     }
 return TRUE;
 }


 void doForwardTest() {
   if (opt_terse == FALSE) {
     printf("Doing the forward test\n");
   }
   int32_t noBreaks = 0;
   int32_t i = 0;
   unsigned long startTime = timeGetTime();
   unsigned long elapsedTime = 0;
   if(opt_icu) {
     createICUBrkIt();
     brkit->setText(UnicodeString(text, textSize));
     brkit->first();
     if (opt_terse == FALSE) {
       printf("Warmup\n");
     }
     int j;
     while((j = brkit->next()) != BreakIterator::DONE) {
       noBreaks++;
       //fprintf(stderr, "%d ", j);
     }

     if (opt_terse == FALSE) {
       printf("Measure\n");
     }
     startTime = timeGetTime();
     for(i = 0; i < opt_loopCount; i++) {
       brkit->first();
       while(brkit->next() != BreakIterator::DONE) {
       }
     }

     elapsedTime = timeGetTime()-startTime;
   } else if(opt_mac) {
 #if U_PLATFORM_IS_DARWIN_BASED
     createMACBrkIt();
     UniChar* filePtr = text;
     OSStatus status = noErr;
     UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;
     startOffset = 0;
     //printf("\t---Search forward--\n");

     while (startOffset < numUniChars)
     {
 	status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
                                startOffset, &breakOffset);
       //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));
       //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset));

       // Output break
       //printf("\t%d\n", (int)breakOffset);

       // Increment counters
 	noBreaks++;
       startOffset = breakOffset;
     }
     startTime = timeGetTime();
     for(i = 0; i < opt_loopCount; i++) {
       startOffset = 0;

       while (startOffset < numUniChars)
 	{
 	  status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
 				   startOffset, &breakOffset);
 	  // Increment counters
 	  startOffset = breakOffset;
 	}
     }
     elapsedTime = timeGetTime()-startTime;
     UCDisposeTextBreakLocator(&breakRef);
 #endif


   }


   if (opt_terse == FALSE) {
   int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
       int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
       int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
       printf("forward break iteration average loop time %d\n", loopTime);
       printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
       printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
   } else {
       printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
   }


 }

 void doIsBoundTest() {
   int32_t noBreaks = 0, hit = 0;
   int32_t i = 0, j = 0;
   unsigned long startTime = timeGetTime();
   unsigned long elapsedTime = 0;
   createICUBrkIt();
   brkit->setText(UnicodeString(text, textSize));
   brkit->first();
   for(j = 0; j < textSize; j++) {
     if(brkit->isBoundary(j)) {
       noBreaks++;
       //fprintf(stderr, "%d ", j);
     }
   }
   /*
   while(brkit->next() != BreakIterator::DONE) {
     noBreaks++;
   }
   */

   startTime = timeGetTime();
   for(i = 0; i < opt_loopCount; i++) {
     for(j = 0; j < textSize; j++) {
       if(brkit->isBoundary(j)) {
         hit++;
       }
     }
   }

   elapsedTime = timeGetTime()-startTime;
   int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
   if (opt_terse == FALSE) {
       int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
       int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
       printf("forward break iteration average loop time %d\n", loopTime);
       printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
       printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
   } else {
       printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
   }
 }

 //----------------------------------------------------------------------------------------
 //
 //   UnixConvert   -- Convert the lines of the file to the encoding for UNIX
 //                    Since it appears that Unicode support is going in the general
 //                    direction of the use of UTF-8 locales, that is the approach
 //                    that is used here.
 //
 //----------------------------------------------------------------------------------------
 void  UnixConvert() {
 #if 0
     int    line;

     UConverter   *cvrtr;    // An ICU code page converter.
     UErrorCode    status = U_ZERO_ERROR;


     cvrtr = ucnv_open("utf-8", &status);    // we are just doing UTF-8 locales for now.
     if (U_FAILURE(status)) {
         fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
         exit(-1);
     }
     // redo for unix
     for (line=0; line < gNumFileLines; line++) {
         int sizeNeeded = ucnv_fromUChars(cvrtr,
                                          0,            // ptr to target buffer.
                                          0,            // length of target buffer.
                                          gFileLines[line].name,
                                          -1,           //  source is null terminated
                                          &status);
         if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
             fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
             exit(-1);
         }
         status = U_ZERO_ERROR;
         gFileLines[line].unixName = new char[sizeNeeded+1];
         sizeNeeded = ucnv_fromUChars(cvrtr,
                                          gFileLines[line].unixName, // ptr to target buffer.
                                          sizeNeeded+1, // length of target buffer.
                                          gFileLines[line].name,
                                          -1,           //  source is null terminated
                                          &status);
         if (U_FAILURE(status)) {
             fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
             exit(-1);
         }
         gFileLines[line].unixName[sizeNeeded] = 0;
     };
     ucnv_close(cvrtr);
 #endif
 }


 //----------------------------------------------------------------------------------------
 //
 //  class UCharFile   Class to hide all the gorp to read a file in
 //                    and produce a stream of UChars.
 //
 //----------------------------------------------------------------------------------------
 class UCharFile {
 public:
     UCharFile(const char *fileName);
     ~UCharFile();
     UChar   get();
     UBool   eof() {return fEof;};
     UBool   error() {return fError;};
     int32_t size() { return fFileSize; };

 private:
     UCharFile (const UCharFile &other) {};                         // No copy constructor.
     UCharFile & operator = (const UCharFile &other) {return *this;};   // No assignment op

     FILE         *fFile;
     const char   *fName;
     UBool        fEof;
     UBool        fError;
     UChar        fPending2ndSurrogate;
     int32_t      fFileSize;

     enum {UTF16LE, UTF16BE, UTF8} fEncoding;
 };

 UCharFile::UCharFile(const char * fileName) {
     fEof                 = FALSE;
     fError               = FALSE;
     fName                = fileName;
     struct stat buf;
     int32_t result = stat(fileName, &buf);
     if(result != 0) {
       fprintf(stderr, "Error getting info\n");
       fFileSize = -1;
     } else {
       fFileSize = buf.st_size;
     }
     fFile                = fopen(fName, "rb");
     fPending2ndSurrogate = 0;
     if (fFile == NULL) {
         fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
         fError = TRUE;
         return;
     }
     //
     //  Look for the byte order mark at the start of the file.
     //
     int BOMC1, BOMC2, BOMC3;
     BOMC1 = fgetc(fFile);
     BOMC2 = fgetc(fFile);

     if (BOMC1 == 0xff && BOMC2 == 0xfe) {
         fEncoding = UTF16LE; }
     else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
         fEncoding = UTF16BE; }
     else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
         fEncoding = UTF8; }
     else
     {
         fprintf(stderr, "collperf:  file \"%s\" encoding must be UTF-8 or UTF-16, and "
             "must include a BOM.\n", fileName);
         fError = true;
         return;
     }
 }


 UCharFile::~UCharFile() {
     fclose(fFile);
 }


 UChar UCharFile::get() {
     UChar   c;
     switch (fEncoding) {
     case UTF16LE:
         {
             int  cL, cH;
             cL = fgetc(fFile);
             cH = fgetc(fFile);
             c  = cL  | (cH << 8);
             if (cH == EOF) {
                 c   = 0;
                 fEof = TRUE;
             }
             break;
         }
     case UTF16BE:
         {
             int  cL, cH;
             cH = fgetc(fFile);
             cL = fgetc(fFile);
             c  = cL  | (cH << 8);
             if (cL == EOF) {
                 c   = 0;
                 fEof = TRUE;
             }
             break;
         }
     case UTF8:
         {
             if (fPending2ndSurrogate != 0) {
                 c = fPending2ndSurrogate;
                 fPending2ndSurrogate = 0;
                 break;
             }

             int ch = fgetc(fFile);   // Note:  c and ch are separate cause eof test doesn't work on UChar type.
             if (ch == EOF) {
                 c = 0;
                 fEof = TRUE;
                 break;
             }

             if (ch <= 0x7f) {
                 // It's ascii.  No further utf-8 conversion.
                 c = ch;
                 break;
             }

             // Figure out the lenght of the char and read the rest of the bytes
             //   into a temp array.
             int nBytes;
             if (ch >= 0xF0) {nBytes=4;}
             else if (ch >= 0xE0) {nBytes=3;}
             else if (ch >= 0xC0) {nBytes=2;}
             else {
                 fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile));
                 fError = TRUE;
                 return 0;
             }

             unsigned char  bytes[10];
             bytes[0] = (unsigned char)ch;
             int i;
             for (i=1; i<nBytes; i++) {
                 bytes[i] = fgetc(fFile);
                 if (bytes[i] < 0x80 || bytes[i] >= 0xc0) {
                     fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch);
                     fError = TRUE;
                     return 0;
                 }
             }

             // Convert the bytes from the temp array to a Unicode char.
             i = 0;
             uint32_t  cp;
             U8_NEXT_UNSAFE(bytes, i, cp);
             c = (UChar)cp;

             if (cp >= 0x10000) {
                 // The code point needs to be broken up into a utf-16 surrogate pair.
                 //  Process first half this time through the main loop, and
                 //   remember the other half for the next time through.
                 UChar utf16Buf[3];
                 i = 0;
                 UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
                 fPending2ndSurrogate = utf16Buf[1];
                 c = utf16Buf[0];
             }
             break;
         };
     }
     return c;
 }


 //----------------------------------------------------------------------------------------
 //
 //    Main   --  process command line, read in and pre-process the test file,
 //                 call other functions to do the actual tests.
 //
 //----------------------------------------------------------------------------------------
 int main(int argc, const char** argv) {
     if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) {
         printf(gUsageString);
         exit (1);
     }
     // Make sure that we've only got one API selected.
     if (opt_mac || opt_unix || opt_win) opt_icu = FALSE;
     if (opt_mac || opt_unix) opt_win = FALSE;
     if (opt_mac) opt_unix = FALSE;

     UErrorCode          status = U_ZERO_ERROR;


     //
     //  Set up a Windows LCID
     //
   /*
     if (opt_langid != 0) {
         gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
     }
     else {
         gWinLCID = uloc_getLCID(opt_locale);
     }
   */

     //
     //  Set the UNIX locale
     //
     if (opt_unix) {
         if (setlocale(LC_ALL, opt_locale) == 0) {
             fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
             exit(-1);
         }
     }

     // Read in  the input file.
     //   File assumed to be utf-16.
     //   Lines go onto heap buffers.  Global index array to line starts is created.
     //   Lines themselves are null terminated.
     //

     UCharFile f(opt_fName);
     if (f.error()) {
         exit(-1);
     }
     int32_t fileSize = f.size();
     const int STARTSIZE = 70000;
     int32_t bufSize = 0;
     int32_t charCount = 0;
     if(fileSize != -1) {
       text = (UChar *)malloc(fileSize*sizeof(UChar));
       bufSize = fileSize;
     } else {
       text = (UChar *)malloc(STARTSIZE*sizeof(UChar));
       bufSize = STARTSIZE;
     }
     if(text == NULL) {
       fprintf(stderr, "Allocating buffer failed\n");
       exit(-1);
     }


     //  Read the file, split into lines, and save in memory.
     //  Loop runs once per utf-16 value from the input file,
     //    (The number of bytes read from file per loop iteration depends on external encoding.)
     for (;;) {

         UChar c = f.get();
         if(f.eof()) {
           break;
         }
         if (f.error()){
           exit(-1);
         }
         // We now have a good UTF-16 value in c.
         text[charCount++] = c;
         if(charCount == bufSize) {
           text = (UChar *)realloc(text, 2*bufSize*sizeof(UChar));
           if(text == NULL) {
             fprintf(stderr, "Reallocating buffer failed\n");
             exit(-1);
           }
           bufSize *= 2;
         }
     }


     if (opt_terse == FALSE) {
         printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);
     }

     textSize = charCount;


     //
     //  Dump file contents if requested.
     //
     if (opt_dump) {
       // dump file, etc... possibly
     }


     //
     //  We've got the file read into memory.  Go do something with it.
     //
     int32_t i = 0;
     for(i = 0; i < opt_passesCount; i++) {
       if(opt_loopCount != 0) {
         if(opt_next) {
           doForwardTest();
         } else if(opt_isBound) {
           doIsBoundTest();
         } else {
           doForwardTest();
         }
       } else if(opt_time != 0) {

       }
     }

   if(text != NULL) {
     free(text);
   }
     if(brkit != NULL) {
       delete brkit;
     }

     return 0;
 }
	/********************************************************************
	* COPYRIGHT:
	* Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved.
	*
	********************************************************************/
	/********************************************************************************
	*
	* File ubrkperf.cpp
	*
	* Modification History:
	* Name Description
	* Vladimir Weinstein First Version, based on collperf
	*
	*********************************************************************************
	*/

	//
	// This program tests break iterator performance
	// Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs
	// (if any)
	// A text file is required as input. It must be in utf-8 or utf-16 format,
	// and include a byte order mark. Either LE or BE format is OK.
	//

	const char gUsageString[] =
	"usage: ubrkperf options...\n"
	"-help Display this message.\n"
	"-file file_name utf-16/utf-8 format file.\n"
	"-locale name ICU locale to use. Default is en_US\n"
	"-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
	" see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
	"-win Run test using Windows native services. (currently not working) (ICU is default)\n"
	"-unix Run test using Unix word breaking services. (currently not working) \n"
	"-mac Run test using MacOSX word breaking services.\n"
	"-uselen Use API with string lengths. Default is null-terminated strings\n"
	"-char Use character break iterator\n"
	"-word Use word break iterator\n"
	"-line Use line break iterator\n"
	"-sentence Use sentence break iterator\n"
	"-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
	"-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
	" under test at each call point. For measuring test overhead.\n"
	"-terse Terse numbers-only output. Intended for use by scripts.\n"
	"-dump Display stuff.\n"
	"-capi Use C APIs instead of C++ APIs (currently not working)\n"
	"-next Do the next test\n"
	"-isBound Do the isBound test\n"
	;


	#include <stdio.h>
	#include <string.h>
	#include <stdlib.h>
	#include <math.h>
	#include <locale.h>
	#include <errno.h>
	#include <sys/stat.h>

	#include <unicode/utypes.h>
	#include <unicode/ucol.h>
	#include <unicode/ucoleitr.h>
	#include <unicode/uloc.h>
	#include <unicode/ustring.h>
	#include <unicode/ures.h>
	#include <unicode/uchar.h>
	#include <unicode/ucnv.h>
	#include <unicode/utf8.h>

	#include <unicode/brkiter.h>


	#if U_PLATFORM_HAS_WIN32_API
	#include <windows.h>
	#else
	//
	// Stubs for Windows API functions when building on UNIXes.
	//
	#include <sys/time.h>
	unsigned long timeGetTime() {
	struct timeval t;
	gettimeofday(&t, 0);
	unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.
	val += t.tv_usec / 1000;
	return val;
	};
	#define MAKELCID(a,b) 0
	#endif


	//
	// Command line option variables
	// These global variables are set according to the options specified
	// on the command line by the user.
	char * opt_fName = 0;
	char * opt_locale = "en_US";
	int opt_langid = 0; // Defaults to value corresponding to opt_locale.
	char * opt_rules = 0;
	UBool opt_help = FALSE;
	int opt_time = 0;
	int opt_loopCount = 0;
	int opt_passesCount= 1;
	UBool opt_terse = FALSE;
	UBool opt_icu = TRUE;
	UBool opt_win = FALSE; // Run with Windows native functions.
	UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.
	UBool opt_mac = FALSE; // Run with MacOSX word break services.
	UBool opt_uselen = FALSE;
	UBool opt_dump = FALSE;
	UBool opt_char = FALSE;
	UBool opt_word = FALSE;
	UBool opt_line = FALSE;
	UBool opt_sentence = FALSE;
	UBool opt_capi = FALSE;

	UBool opt_next = FALSE;
	UBool opt_isBound = FALSE;



	//
	// Definitions for the command line options
	//
	struct OptSpec {
	const char *name;
	enum {FLAG, NUM, STRING} type;
	void *pVar;
	};

	OptSpec opts[] = {
	{"-file", OptSpec::STRING, &opt_fName},
	{"-locale", OptSpec::STRING, &opt_locale},
	{"-langid", OptSpec::NUM, &opt_langid},
	{"-win", OptSpec::FLAG, &opt_win},
	{"-unix", OptSpec::FLAG, &opt_unix},
	{"-mac", OptSpec::FLAG, &opt_mac},
	{"-uselen", OptSpec::FLAG, &opt_uselen},
	{"-loop", OptSpec::NUM, &opt_loopCount},
	{"-time", OptSpec::NUM, &opt_time},
	{"-passes", OptSpec::NUM, &opt_passesCount},
	{"-char", OptSpec::FLAG, &opt_char},
	{"-word", OptSpec::FLAG, &opt_word},
	{"-line", OptSpec::FLAG, &opt_line},
	{"-sentence", OptSpec::FLAG, &opt_sentence},
	{"-terse", OptSpec::FLAG, &opt_terse},
	{"-dump", OptSpec::FLAG, &opt_dump},
	{"-capi", OptSpec::FLAG, &opt_capi},
	{"-next", OptSpec::FLAG, &opt_next},
	{"-isBound", OptSpec::FLAG, &opt_isBound},
	{"-help", OptSpec::FLAG, &opt_help},
	{"-?", OptSpec::FLAG, &opt_help},
	{0, OptSpec::FLAG, 0}
	};


	//---------------------------------------------------------------------------
	//
	// Global variables pointing to and describing the test file
	//
	//---------------------------------------------------------------------------

	//DWORD gWinLCID;
	BreakIterator *brkit = NULL;
	UChar *text = NULL;
	int32_t textSize = 0;



	#if U_PLATFORM_IS_DARWIN_BASED
	#include <ApplicationServices/ApplicationServices.h>
	enum{
	kUCTextBreakAllMask = (kUCTextBreakClusterMask \| kUCTextBreakWordMask \| kUCTextBreakLineMask)
	};
	UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};
	TextBreakLocatorRef breakRef;
	UCTextBreakType macBreakType;

	void createMACBrkIt() {
	OSStatus status = noErr;
	LocaleRef lref;
	status = LocaleRefFromLocaleString(opt_locale, &lref);
	status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef);
	if(opt_char == TRUE) {
	macBreakType = kUCTextBreakClusterMask;
	} else if(opt_word == TRUE) {
	macBreakType = kUCTextBreakWordMask;
	} else if(opt_line == TRUE) {
	macBreakType = kUCTextBreakLineMask;
	} else if(opt_sentence == TRUE) {
	// error
	// brkit = BreakIterator::createSentenceInstance(opt_locale, status);
	} else {
	// default is character iterator
	macBreakType = kUCTextBreakClusterMask;
	}
	}
	#endif

	void createICUBrkIt() {
	//
	// Set up an ICU break iterator
	//
	UErrorCode status = U_ZERO_ERROR;
	if(opt_char == TRUE) {
	brkit = BreakIterator::createCharacterInstance(opt_locale, status);
	} else if(opt_word == TRUE) {
	brkit = BreakIterator::createWordInstance(opt_locale, status);
	} else if(opt_line == TRUE) {
	brkit = BreakIterator::createLineInstance(opt_locale, status);
	} else if(opt_sentence == TRUE) {
	brkit = BreakIterator::createSentenceInstance(opt_locale, status);
	} else {
	// default is character iterator
	brkit = BreakIterator::createCharacterInstance(opt_locale, status);
	}
	if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
	fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
	}
	if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
	fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
	}

	}

	//---------------------------------------------------------------------------
	//
	// ProcessOptions() Function to read the command line options.
	//
	//---------------------------------------------------------------------------
	UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
	{
	int i;
	int argNum;
	const char *pArgName;
	OptSpec *pOpt;

	for (argNum=1; argNum<argc; argNum++) {
	pArgName = argv[argNum];
	for (pOpt = opts; pOpt->name != 0; pOpt++) {
	if (strcmp(pOpt->name, pArgName) == 0) {
	switch (pOpt->type) {
	case OptSpec::FLAG:
	(UBool )(pOpt->pVar) = TRUE;
	break;
	case OptSpec::STRING:
	argNum ++;
	if (argNum >= argc) {
	fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
	return FALSE;
	}
	(const char *)(pOpt->pVar) = argv[argNum];
	break;
	case OptSpec::NUM:
	argNum ++;
	if (argNum >= argc) {
	fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
	return FALSE;
	}
	char *endp;
	i = strtol(argv[argNum], &endp, 0);
	if (endp == argv[argNum]) {
	fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
	return FALSE;
	}
	(int )(pOpt->pVar) = i;
	}
	break;
	}
	}
	if (pOpt->name == 0)
	{
	fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
	return FALSE;
	}
	}
	return TRUE;
	}


	void doForwardTest() {
	if (opt_terse == FALSE) {
	printf("Doing the forward test\n");
	}
	int32_t noBreaks = 0;
	int32_t i = 0;
	unsigned long startTime = timeGetTime();
	unsigned long elapsedTime = 0;
	if(opt_icu) {
	createICUBrkIt();
	brkit->setText(UnicodeString(text, textSize));
	brkit->first();
	if (opt_terse == FALSE) {
	printf("Warmup\n");
	}
	int j;
	while((j = brkit->next()) != BreakIterator::DONE) {
	noBreaks++;
	//fprintf(stderr, "%d ", j);
	}

	if (opt_terse == FALSE) {
	printf("Measure\n");
	}
	startTime = timeGetTime();
	for(i = 0; i < opt_loopCount; i++) {
	brkit->first();
	while(brkit->next() != BreakIterator::DONE) {
	}
	}

	elapsedTime = timeGetTime()-startTime;
	} else if(opt_mac) {
	#if U_PLATFORM_IS_DARWIN_BASED
	createMACBrkIt();
	UniChar* filePtr = text;
	OSStatus status = noErr;
	UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;
	startOffset = 0;
	//printf("\t---Search forward--\n");

	while (startOffset < numUniChars)
	{
	status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
	startOffset, &breakOffset);
	//require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));
	//require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset));

	// Output break
	//printf("\t%d\n", (int)breakOffset);

	// Increment counters
	noBreaks++;
	startOffset = breakOffset;
	}
	startTime = timeGetTime();
	for(i = 0; i < opt_loopCount; i++) {
	startOffset = 0;

	while (startOffset < numUniChars)
	{
	status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
	startOffset, &breakOffset);
	// Increment counters
	startOffset = breakOffset;
	}
	}
	elapsedTime = timeGetTime()-startTime;
	UCDisposeTextBreakLocator(&breakRef);
	#endif


	}


	if (opt_terse == FALSE) {
	int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
	int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
	int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
	printf("forward break iteration average loop time %d\n", loopTime);
	printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
	printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
	} else {
	printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
	}


	}

	void doIsBoundTest() {
	int32_t noBreaks = 0, hit = 0;
	int32_t i = 0, j = 0;
	unsigned long startTime = timeGetTime();
	unsigned long elapsedTime = 0;
	createICUBrkIt();
	brkit->setText(UnicodeString(text, textSize));
	brkit->first();
	for(j = 0; j < textSize; j++) {
	if(brkit->isBoundary(j)) {
	noBreaks++;
	//fprintf(stderr, "%d ", j);
	}
	}
	/*
	while(brkit->next() != BreakIterator::DONE) {
	noBreaks++;
	}
	*/

	startTime = timeGetTime();
	for(i = 0; i < opt_loopCount; i++) {
	for(j = 0; j < textSize; j++) {
	if(brkit->isBoundary(j)) {
	hit++;
	}
	}
	}

	elapsedTime = timeGetTime()-startTime;
	int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
	if (opt_terse == FALSE) {
	int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
	int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
	printf("forward break iteration average loop time %d\n", loopTime);
	printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
	printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
	} else {
	printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
	}
	}

	//----------------------------------------------------------------------------------------
	//
	// UnixConvert -- Convert the lines of the file to the encoding for UNIX
	// Since it appears that Unicode support is going in the general
	// direction of the use of UTF-8 locales, that is the approach
	// that is used here.
	//
	//----------------------------------------------------------------------------------------
	void UnixConvert() {
	#if 0
	int line;

	UConverter *cvrtr; // An ICU code page converter.
	UErrorCode status = U_ZERO_ERROR;


	cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now.
	if (U_FAILURE(status)) {
	fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
	exit(-1);
	}
	// redo for unix
	for (line=0; line < gNumFileLines; line++) {
	int sizeNeeded = ucnv_fromUChars(cvrtr,
	0, // ptr to target buffer.
	0, // length of target buffer.
	gFileLines[line].name,
	-1, // source is null terminated
	&status);
	if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
	fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
	exit(-1);
	}
	status = U_ZERO_ERROR;
	gFileLines[line].unixName = new char[sizeNeeded+1];
	sizeNeeded = ucnv_fromUChars(cvrtr,
	gFileLines[line].unixName, // ptr to target buffer.
	sizeNeeded+1, // length of target buffer.
	gFileLines[line].name,
	-1, // source is null terminated
	&status);
	if (U_FAILURE(status)) {
	fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
	exit(-1);
	}
	gFileLines[line].unixName[sizeNeeded] = 0;
	};
	ucnv_close(cvrtr);
	#endif
	}


	//----------------------------------------------------------------------------------------
	//
	// class UCharFile Class to hide all the gorp to read a file in
	// and produce a stream of UChars.
	//
	//----------------------------------------------------------------------------------------
	class UCharFile {
	public:
	UCharFile(const char *fileName);
	~UCharFile();
	UChar get();
	UBool eof() {return fEof;};
	UBool error() {return fError;};
	int32_t size() { return fFileSize; };

	private:
	UCharFile (const UCharFile &other) {}; // No copy constructor.
	UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op

	FILE *fFile;
	const char *fName;
	UBool fEof;
	UBool fError;
	UChar fPending2ndSurrogate;
	int32_t fFileSize;

	enum {UTF16LE, UTF16BE, UTF8} fEncoding;
	};

	UCharFile::UCharFile(const char * fileName) {
	fEof = FALSE;
	fError = FALSE;
	fName = fileName;
	struct stat buf;
	int32_t result = stat(fileName, &buf);
	if(result != 0) {
	fprintf(stderr, "Error getting info\n");
	fFileSize = -1;
	} else {
	fFileSize = buf.st_size;
	}
	fFile = fopen(fName, "rb");
	fPending2ndSurrogate = 0;
	if (fFile == NULL) {
	fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
	fError = TRUE;
	return;
	}
	//
	// Look for the byte order mark at the start of the file.
	//
	int BOMC1, BOMC2, BOMC3;
	BOMC1 = fgetc(fFile);
	BOMC2 = fgetc(fFile);

	if (BOMC1 == 0xff && BOMC2 == 0xfe) {
	fEncoding = UTF16LE; }
	else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
	fEncoding = UTF16BE; }
	else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
	fEncoding = UTF8; }
	else
	{
	fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and "
	"must include a BOM.\n", fileName);
	fError = true;
	return;
	}
	}


	UCharFile::~UCharFile() {
	fclose(fFile);
	}



	UChar UCharFile::get() {
	UChar c;
	switch (fEncoding) {
	case UTF16LE:
	{
	int cL, cH;
	cL = fgetc(fFile);
	cH = fgetc(fFile);
	c = cL \| (cH << 8);
	if (cH == EOF) {
	c = 0;
	fEof = TRUE;
	}
	break;
	}
	case UTF16BE:
	{
	int cL, cH;
	cH = fgetc(fFile);
	cL = fgetc(fFile);
	c = cL \| (cH << 8);
	if (cL == EOF) {
	c = 0;
	fEof = TRUE;
	}
	break;
	}
	case UTF8:
	{
	if (fPending2ndSurrogate != 0) {
	c = fPending2ndSurrogate;
	fPending2ndSurrogate = 0;
	break;
	}

	int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type.
	if (ch == EOF) {
	c = 0;
	fEof = TRUE;
	break;
	}

	if (ch <= 0x7f) {
	// It's ascii. No further utf-8 conversion.
	c = ch;
	break;
	}

	// Figure out the lenght of the char and read the rest of the bytes
	// into a temp array.
	int nBytes;
	if (ch >= 0xF0) {nBytes=4;}
	else if (ch >= 0xE0) {nBytes=3;}
	else if (ch >= 0xC0) {nBytes=2;}
	else {
	fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile));
	fError = TRUE;
	return 0;
	}

	unsigned char bytes[10];
	bytes[0] = (unsigned char)ch;
	int i;
	for (i=1; i<nBytes; i++) {
	bytes[i] = fgetc(fFile);
	if (bytes[i] < 0x80 \|\| bytes[i] >= 0xc0) {
	fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch);
	fError = TRUE;
	return 0;
	}
	}

	// Convert the bytes from the temp array to a Unicode char.
	i = 0;
	uint32_t cp;
	U8_NEXT_UNSAFE(bytes, i, cp);
	c = (UChar)cp;

	if (cp >= 0x10000) {
	// The code point needs to be broken up into a utf-16 surrogate pair.
	// Process first half this time through the main loop, and
	// remember the other half for the next time through.
	UChar utf16Buf[3];
	i = 0;
	UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
	fPending2ndSurrogate = utf16Buf[1];
	c = utf16Buf[0];
	}
	break;
	};
	}
	return c;
	}


	//----------------------------------------------------------------------------------------
	//
	// Main -- process command line, read in and pre-process the test file,
	// call other functions to do the actual tests.
	//
	//----------------------------------------------------------------------------------------
	int main(int argc, const char** argv) {
	if (ProcessOptions(argc, argv, opts) != TRUE \|\| opt_help \|\| opt_fName == 0) {
	printf(gUsageString);
	exit (1);
	}
	// Make sure that we've only got one API selected.
	if (opt_mac \|\| opt_unix \|\| opt_win) opt_icu = FALSE;
	if (opt_mac \|\| opt_unix) opt_win = FALSE;
	if (opt_mac) opt_unix = FALSE;

	UErrorCode status = U_ZERO_ERROR;



	//
	// Set up a Windows LCID
	//
	/*
	if (opt_langid != 0) {
	gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
	}
	else {
	gWinLCID = uloc_getLCID(opt_locale);
	}
	*/

	//
	// Set the UNIX locale
	//
	if (opt_unix) {
	if (setlocale(LC_ALL, opt_locale) == 0) {
	fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
	exit(-1);
	}
	}

	// Read in the input file.
	// File assumed to be utf-16.
	// Lines go onto heap buffers. Global index array to line starts is created.
	// Lines themselves are null terminated.
	//

	UCharFile f(opt_fName);
	if (f.error()) {
	exit(-1);
	}
	int32_t fileSize = f.size();
	const int STARTSIZE = 70000;
	int32_t bufSize = 0;
	int32_t charCount = 0;
	if(fileSize != -1) {
	text = (UChar )malloc(fileSizesizeof(UChar));
	bufSize = fileSize;
	} else {
	text = (UChar )malloc(STARTSIZEsizeof(UChar));
	bufSize = STARTSIZE;
	}
	if(text == NULL) {
	fprintf(stderr, "Allocating buffer failed\n");
	exit(-1);
	}


	// Read the file, split into lines, and save in memory.
	// Loop runs once per utf-16 value from the input file,
	// (The number of bytes read from file per loop iteration depends on external encoding.)
	for (;;) {

	UChar c = f.get();
	if(f.eof()) {
	break;
	}
	if (f.error()){
	exit(-1);
	}
	// We now have a good UTF-16 value in c.
	text[charCount++] = c;
	if(charCount == bufSize) {
	text = (UChar )realloc(text, 2bufSize*sizeof(UChar));
	if(text == NULL) {
	fprintf(stderr, "Reallocating buffer failed\n");
	exit(-1);
	}
	bufSize *= 2;
	}
	}


	if (opt_terse == FALSE) {
	printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);
	}

	textSize = charCount;




	//
	// Dump file contents if requested.
	//
	if (opt_dump) {
	// dump file, etc... possibly
	}


	//
	// We've got the file read into memory. Go do something with it.
	//
	int32_t i = 0;
	for(i = 0; i < opt_passesCount; i++) {
	if(opt_loopCount != 0) {
	if(opt_next) {
	doForwardTest();
	} else if(opt_isBound) {
	doIsBoundTest();
	} else {
	doForwardTest();
	}
	} else if(opt_time != 0) {

	}
	}

	if(text != NULL) {
	free(text);
	}
	if(brkit != NULL) {
	delete brkit;
	}

	return 0;
	}