third_party/web_platform_tests/resources/webidl2/test/widlproc/src/lex.c - cobalt - Git at Google

 /***********************************************************************
  * $Id$
  * Copyright 2009 Aplix Corporation. All rights reserved.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *     http://www.apache.org/licenses/LICENSE-2.0
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  ***********************************************************************/
 #include <assert.h>
 #include <errno.h>
 #include <stdio.h>
 #include <string.h>
 #include "lex.h"
 #include "misc.h"
 #include "node.h"
 #include "process.h"

 struct file {
     struct file *next;
     const char *filename;
     char *buf;
     const char *pos, *end;
     unsigned int linenum;
 };

 const char keywords[] = KEYWORDS;

 static struct file *file, *firstfile;
 static struct tok tok;

 /***********************************************************************
  * readinput : read all input files into memory
  *
  * Enter:   argv = 0-terminated array of filenames
  */
 void
 readinput(const char *const *argv)
 {
     struct file **pfile = &file;
     for (;;) {
         struct file *file;
         const char *filename = *argv++;
         char *buf = 0;
         int len = 0, thislen, isstdin;
         FILE *handle;
         if (!filename)
             break;
         /* Read the file. */
         isstdin = !strcmp(filename, "-");
         if (isstdin) {
             handle = stdin;
             filename = "<stdin>";
         } else {
             handle = fopen(filename, "rb");
             if (!handle)
                 errorexit("%s: %s", filename, strerror(errno));
         }
         for (;;) {
             thislen = len ? len * 2 : 4096;
             buf = memrealloc(buf, len + thislen + 1);
             thislen = fread(buf + len, 1, thislen, handle);
             if (!thislen)
                 break;
             len += thislen;
         }
         if (ferror(handle))
             errorexit("%s: I/O error", filename);
         if (!isstdin)
             fclose(handle);
         buf[len] = 0;
         buf = memrealloc(buf, len + 1);
         /* Create the file struct for it. */
         file = memalloc(sizeof(struct file));
         *pfile = file;
         pfile = &file->next;
         file->filename = filename;
         file->pos = file->buf = buf;
         file->end = buf + len;
         file->linenum = 1;
     }
     *pfile = 0;
     firstfile = file;
 }

 /***********************************************************************
  * lexerrorexit : error and exit with line number
  */
 static void
 lexerrorexit(const char *format, ...)
 {
     va_list ap;
     va_start(ap, format);
     vlocerrorexit(file->filename, file->linenum, format, ap);
     va_end(ap);
 }

 /***********************************************************************
  * lexblockcomment : lex a block comment
  *
  * Enter:   start = start of comment
  *
  * Return:  tok struct, lifetime until next call to lex
  */
 static struct tok *
 lexblockcomment(const char *start)
 {
     const char *p = start + 1;
     tok.filename = file->filename;
     tok.linenum = file->linenum;
     for (;;) {
         int ch = *++p;
         if (!ch)
             lexerrorexit("unterminated block comment");
         if (ch != '*') {
             if (ch == '\n')
                 file->linenum++;
             continue;
         }
         ch = p[1];
         if (!ch)
             lexerrorexit("unterminated block comment");
         if (ch == '/')
             break;
     }
     p += 2;
     file->pos = p;
     tok.type = TOK_BLOCKCOMMENT;
     tok.start = start + 2;
     tok.len = p - start - 4;
     return &tok;
 }

 /***********************************************************************
  * lexinlinecomment : lex an inline comment
  *
  * Enter:   start = start of comment, starts with "//"
  *
  * Return:  tok struct, lifetime until next call to lex
  */
 static struct tok *
 lexinlinecomment(const char *start)
 {
     const char *p = start + 2;
     p = start + 1;
     for (;;) {
         int ch = *++p;
         if (!ch || ch == '\n')
             break;
     }
     p++;
     file->pos = p;
     tok.type = TOK_INLINECOMMENT;
     tok.start = start + 2;
     tok.len = p - start - 2;
     tok.filename = file->filename;
     tok.linenum = file->linenum++;
     return &tok;
 }

 /***********************************************************************
  * lexnumber : lex a number (or just a '-' symbol)
  *
  * Enter:   start = start of token
  *
  * Return:  tok struct, lifetime until next call to lex
  *
  * The IDL grammar seems to say that a float can't start with a
  * decimal point, so that's what we have implemented here.
  */
 static struct tok *
 lexnumber(const char *start)
 {
     for (;;) {
         const char *p = start;
         const char *octalend = start;
         int ch = *p;
         enum { STATE_START, STATE_INT, STATE_HEX, STATE_OCTAL, STATE_BADOCTAL,
                 STATE_DP, STATE_EXPSTART, STATE_EXPSIGN, STATE_EXP
                 } state = STATE_START;
         if (ch == '-') {
             ch = *++p;
 	    if (ch == 'I') { // starts of Infinity
 	      char * infinity = "-Infinity";
               unsigned int len = strlen(infinity);
 	      if (!memcmp(start, infinity, len)) {
                 tok.type = TOK_minusinfinity;
 		tok.start = start;
 		tok.len = len;
 		tok.filename = file->filename;
 		tok.linenum = file->linenum;
 		file->pos = start + len;
 		return &tok;
 	      }
 	    }
 	}
         if (ch == '0') {
             state = STATE_OCTAL;
             ch = *++p;
             if ((ch & ~0x20) == 'X') {
                 state = STATE_HEX;
                 ch = *++p;
             }
         }

         for (;;) {
             if ((unsigned)(ch - '0') >= 8) {
                 if ((ch & -2) == '8') {
                     if (state == STATE_OCTAL) {
                         state = STATE_BADOCTAL;
                         octalend = p;
                     }
                 } else if ((unsigned)((ch & ~0x20) - 'A') <= 'F' - 'A') {
                     if (state != STATE_HEX) {
                         if ((ch & ~0x20) != 'E')
                             break;
                         if (state == STATE_HEX || state >= STATE_EXPSTART || state == STATE_START)
                             break;
                         state = STATE_EXPSTART;
                     }
                 } else if (ch == '.') {
                     if (state == STATE_HEX || state >= STATE_DP)
                         break;
                     state = STATE_DP;
                 } else if (ch == '-') {
                     if (state != STATE_EXPSTART)
                         break;
                     state = STATE_EXPSIGN;
                 } else
                     break;
             }
             ch = *++p;
             if (state == STATE_START)
                 state = STATE_INT;
             else if (state == STATE_EXPSTART || state == STATE_EXPSIGN)
                 state = STATE_EXP;
         }
         switch (state) {
         case STATE_START:
             /* Must have just been a - character by itself. */
             tok.type = '-';
             p = start + 1;
             break;
         case STATE_BADOCTAL:
             p = octalend;
             /* fall through... */
         case STATE_INT:
         case STATE_OCTAL:
             tok.type = TOK_INTEGER;
             break;
         case STATE_HEX:
             if (p - start == 2 || (p - start == 3 && *start == '-'))
                 p = start + 1;
             tok.type = TOK_INTEGER;
             break;
         case STATE_EXP:
         case STATE_DP:
             tok.type = TOK_FLOAT;
             break;
         case STATE_EXPSIGN:
             p--;
             /* fall through... */
         case STATE_EXPSTART:
             p--;
             tok.type = TOK_FLOAT;
             break;
         }
         tok.start = start;
         tok.len = p - start;
         tok.filename = file->filename;
         tok.linenum = file->linenum;
         file->pos = p;
         return &tok;
     }
 }

 /***********************************************************************
  * lexstring : lex a quoted string
  *
  * Enter:   start = start of token
  *
  * Return:  tok struct, lifetime until next call to lex
  */
 static struct tok *
 lexstring(const char *start)
 {
     for (;;) {
         const char *p = start + 1;
         int ch = *p;
         for (;;) {
             if (!ch || ch == '\n')
                 lexerrorexit("unterminated string");
             if (ch == '"') {
                 tok.type = TOK_STRING;
                 tok.start = start + 1;
                 tok.len = p - start - 1;
                 tok.filename = file->filename;
                 tok.linenum = file->linenum;
                 file->pos = p + 1;
                 return &tok;
             }
             /* Note the IDL spec doesn't seem to allow for escape sequences
              * in strings. */
             ch = *++p;
         }
     }
 }

 /***********************************************************************
  * lexidentifier : lex an identifier
  *
  * Enter:   start = start of token
  *
  * Return:  tok struct, lifetime until next call to lex
  */
 static struct tok *
 lexidentifier(const char *start)
 {
     const char *p = start + 1;
     for (;;) {
         int ch = *p;
         if (ch != '_' && (unsigned)(ch - '0') >= 10
                 && (unsigned)((ch & ~0x20) - 'A') > 'Z' - 'A')
         {
             break;
         }
         p++;
     }
     tok.type = TOK_IDENTIFIER;
     tok.start = start;
     tok.len = p - start;
     tok.filename = file->filename;
     tok.linenum = file->linenum;
     file->pos = p;
     /* See if this is a keyword. (This search is a bit n-squared.) */
     {
         unsigned int type = TOK_DOMString;
         p = keywords;
         for (;;) {
             unsigned int len = strlen(p);
             if (!len)
                 break;
             if (len == tok.len && !memcmp(start, p, len)) {
                 tok.type = type;
                 break;
             }
             p += len + 1;
             type++;
         }
     }
     return &tok;
 }

 /***********************************************************************
  * lex : retrieve next token
  *
  * Return:  tok struct, lifetime until next call to lex
  */
 struct tok *
 lex(void)
 {
     const char *p;
     int ch;
     for (;;) {
         if (!file) {
             tok.type = TOK_EOF;
             tok.start = "end of file";
             tok.len = strlen(tok.start);
             return &tok;
         }
         tok.prestart = p = file->pos;
         /* Flush whitespace. */
         for (;;) {
             ch = *p++;
             switch (ch) {
             case ' ':
             case '\t':
             case '\r':
                 continue;
             case '\n':
                 ++file->linenum;
                 tok.prestart = p;
                 continue;
             }
             break;
         }
         p--;
         if (ch)
             break;
         if (p != file->end)
             lexerrorexit("\\0 byte not allowed");
         file = file->next;
     }
     /* See if we have a comment. */
     tok.start = p;
     if (ch == '/') {
         switch (*++p) {
         case '*':
             return lexblockcomment(p - 1);
         case '/':
             return lexinlinecomment(p - 1);
         }
         tok.type = '/';
     } else {
         /* Handle things that start with '-', which is either '-' as a token,
          * or a number. Handle numbers. */
         if (ch == '-' || (unsigned)(ch - '0') < 10)
             return lexnumber(p);
         /* Handle string. */
         if (ch == '"')
             return lexstring(p);
         /* Handle identifier. */
         if (ch == '_' || (unsigned)((ch & ~0x20) - 'A') <= 'Z' - 'A')
             return lexidentifier(p);
         /* The only multi-symbol token are ... and [] */
         if (ch == '.') {
             tok.type = '.';
             if (*++p == '.' && p[1] == '.') {
                 tok.type = TOK_ELLIPSIS;
                 p += 2;
             }
             goto done;
         }
         if (ch == '[') {
             tok.type = '[';
             if (*++p == ']') {
                 tok.type = TOK_DOUBLEBRACKET;
                 p++;
             }
             goto done;
         }
     }
     /* Single symbol token. */
     tok.type = ch;
     p++;
 done:
     tok.filename = file->filename;
     tok.linenum = file->linenum;
     tok.len = p - tok.start;
     file->pos = p;
     return &tok;
 }

 /***********************************************************************
  * outputwidl : output literal Web IDL input that node was parsed from
  *
  * Enter:   node = parse node to output literal Web IDL for
  */
 void
 outputwidl(struct node *node)
 {
     const char *start = node->wsstart, *end = node->end;
     /* Find the file that start is in. */
     struct file *file = firstfile;
     while (start < file->buf || start >= file->end) {
         file = file->next;
         assert(file);
     }
     /* Find the (current or) next node that has node->start set. Any such
      * node needs to be put inside a <ref> element. */
     while (node && !node->start)
         node = nodewalk(node);
     /* Output until we get to the end. This has to cope with the text
      * spanning multiple input files. */
     for (;;) {
         int final = end >= file->buf && end <= file->end;
         const char *thisend = final ? end : file->end;
         /* Output the Web IDL, omitting comments. */
         while (start != end) {
             const char *p, *p2, *comment, *endcomment;
             int ch;
             if (node && start == node->start) {
                 /* We are on the start of the present node in the tree
                  * walk. Put it in a <ref>. */
                 fputs("<ref>", stdout);
                 printtext(node->start, node->end - node->start, 1);
                 fputs("</ref>", stdout);
                 start = node->end;
                 /* Skip to the next node with node->start set if any. */
                 do
                     node = nodewalk(node);
                 while (node && !node->start);
                 continue;
             }
             p2 = thisend;
             if (node && node->start >= file->buf && node->start < p2)
                 p2 = node->start;
             p = memchr(start, '/', p2 - start);
             if (!p) {
                 printtext(start, p2 - start, 1);
                 if (p2 != thisend) {
                     start = p2;
                     continue;
                 }
                 break;
             }
             /* See if we're at the start of a comment. If so find the end. */
             comment = 0;
             if (p + 1 != thisend) {
                 switch (p[1]) {
                 case '*':
                     /* Block comment. */
                     comment = p;
                     p++;
                     do
                         p = memchr(p + 1, '*', thisend - p - 1);
                     while (p[1] != '/');
                     endcomment = p + 2;
                     break;
                 case '/':
                     /* Inline comment. */
                     comment = p;
                     p = memchr(p, '\n', thisend - p);
                     if (!p)
                         p = thisend;
                     endcomment = p;
                     break;
                 }
             }
             if (!comment) {
                 /* Not at start of comment. */
                 p++;
                 printtext(start, p - start, 1);
                 start = p;
                 assert(start <= end);
                 continue;
             }
             /* If the comment has only whitespace before it on the line,
              * eat that up. */
             p = comment;
             while (p != start && ((ch = p[-1]) == ' ' || ch == '\t'))
                 p--;
             if (p == start || p[-1] == '\n') {
                 comment = p;
                 /* If the comment has only whitespace after it to the end
                  * of the line, eat that and the newline up. This always
                  * happens for an inline comment on a line by itself. */
                 p = endcomment;
                 while (p != thisend && ((ch = *p) == ' ' || ch == '\t'))
                     p++;
                 if (p != thisend && *p == '\n')
                     p++;
                 endcomment = p;
             }
             printtext(start, comment - start, 1);
             start = endcomment;
             if (start > thisend)
                 start = thisend;
         }
         if (final)
             break;
         file = file->next;
         assert(file);
         start = file->buf;
     }
 }
	/***********************************************************************
	* $Id$
	* Copyright 2009 Aplix Corporation. All rights reserved.
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	* http://www.apache.org/licenses/LICENSE-2.0
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	***********************************************************************/
	#include <assert.h>
	#include <errno.h>
	#include <stdio.h>
	#include <string.h>
	#include "lex.h"
	#include "misc.h"
	#include "node.h"
	#include "process.h"

	struct file {
	struct file *next;
	const char *filename;
	char *buf;
	const char pos, end;
	unsigned int linenum;
	};

	const char keywords[] = KEYWORDS;

	static struct file file, firstfile;
	static struct tok tok;

	/***********************************************************************
	* readinput : read all input files into memory
	*
	* Enter: argv = 0-terminated array of filenames
	*/
	void
	readinput(const char const argv)
	{
	struct file **pfile = &file;
	for (;;) {
	struct file *file;
	const char filename = argv++;
	char *buf = 0;
	int len = 0, thislen, isstdin;
	FILE *handle;
	if (!filename)
	break;
	/* Read the file. */
	isstdin = !strcmp(filename, "-");
	if (isstdin) {
	handle = stdin;
	filename = "<stdin>";
	} else {
	handle = fopen(filename, "rb");
	if (!handle)
	errorexit("%s: %s", filename, strerror(errno));
	}
	for (;;) {
	thislen = len ? len * 2 : 4096;
	buf = memrealloc(buf, len + thislen + 1);
	thislen = fread(buf + len, 1, thislen, handle);
	if (!thislen)
	break;
	len += thislen;
	}
	if (ferror(handle))
	errorexit("%s: I/O error", filename);
	if (!isstdin)
	fclose(handle);
	buf[len] = 0;
	buf = memrealloc(buf, len + 1);
	/* Create the file struct for it. */
	file = memalloc(sizeof(struct file));
	*pfile = file;
	pfile = &file->next;
	file->filename = filename;
	file->pos = file->buf = buf;
	file->end = buf + len;
	file->linenum = 1;
	}
	*pfile = 0;
	firstfile = file;
	}

	/***********************************************************************
	* lexerrorexit : error and exit with line number
	*/
	static void
	lexerrorexit(const char *format, ...)
	{
	va_list ap;
	va_start(ap, format);
	vlocerrorexit(file->filename, file->linenum, format, ap);
	va_end(ap);
	}

	/***********************************************************************
	* lexblockcomment : lex a block comment
	*
	* Enter: start = start of comment
	*
	* Return: tok struct, lifetime until next call to lex
	*/
	static struct tok *
	lexblockcomment(const char *start)
	{
	const char *p = start + 1;
	tok.filename = file->filename;
	tok.linenum = file->linenum;
	for (;;) {
	int ch = *++p;
	if (!ch)
	lexerrorexit("unterminated block comment");
	if (ch != '*') {
	if (ch == '\n')
	file->linenum++;
	continue;
	}
	ch = p[1];
	if (!ch)
	lexerrorexit("unterminated block comment");
	if (ch == '/')
	break;
	}
	p += 2;
	file->pos = p;
	tok.type = TOK_BLOCKCOMMENT;
	tok.start = start + 2;
	tok.len = p - start - 4;
	return &tok;
	}

	/***********************************************************************
	* lexinlinecomment : lex an inline comment
	*
	* Enter: start = start of comment, starts with "//"
	*
	* Return: tok struct, lifetime until next call to lex
	*/
	static struct tok *
	lexinlinecomment(const char *start)
	{
	const char *p = start + 2;
	p = start + 1;
	for (;;) {
	int ch = *++p;
	if (!ch \|\| ch == '\n')
	break;
	}
	p++;
	file->pos = p;
	tok.type = TOK_INLINECOMMENT;
	tok.start = start + 2;
	tok.len = p - start - 2;
	tok.filename = file->filename;
	tok.linenum = file->linenum++;
	return &tok;
	}

	/***********************************************************************
	* lexnumber : lex a number (or just a '-' symbol)
	*
	* Enter: start = start of token
	*
	* Return: tok struct, lifetime until next call to lex
	*
	* The IDL grammar seems to say that a float can't start with a
	* decimal point, so that's what we have implemented here.
	*/
	static struct tok *
	lexnumber(const char *start)
	{
	for (;;) {
	const char *p = start;
	const char *octalend = start;
	int ch = *p;
	enum { STATE_START, STATE_INT, STATE_HEX, STATE_OCTAL, STATE_BADOCTAL,
	STATE_DP, STATE_EXPSTART, STATE_EXPSIGN, STATE_EXP
	} state = STATE_START;
	if (ch == '-') {
	ch = *++p;
	if (ch == 'I') { // starts of Infinity
	char * infinity = "-Infinity";
	unsigned int len = strlen(infinity);
	if (!memcmp(start, infinity, len)) {
	tok.type = TOK_minusinfinity;
	tok.start = start;
	tok.len = len;
	tok.filename = file->filename;
	tok.linenum = file->linenum;
	file->pos = start + len;
	return &tok;
	}
	}
	}
	if (ch == '0') {
	state = STATE_OCTAL;
	ch = *++p;
	if ((ch & ~0x20) == 'X') {
	state = STATE_HEX;
	ch = *++p;
	}
	}

	for (;;) {
	if ((unsigned)(ch - '0') >= 8) {
	if ((ch & -2) == '8') {
	if (state == STATE_OCTAL) {
	state = STATE_BADOCTAL;
	octalend = p;
	}
	} else if ((unsigned)((ch & ~0x20) - 'A') <= 'F' - 'A') {
	if (state != STATE_HEX) {
	if ((ch & ~0x20) != 'E')
	break;
	if (state == STATE_HEX \|\| state >= STATE_EXPSTART \|\| state == STATE_START)
	break;
	state = STATE_EXPSTART;
	}
	} else if (ch == '.') {
	if (state == STATE_HEX \|\| state >= STATE_DP)
	break;
	state = STATE_DP;
	} else if (ch == '-') {
	if (state != STATE_EXPSTART)
	break;
	state = STATE_EXPSIGN;
	} else
	break;
	}
	ch = *++p;
	if (state == STATE_START)
	state = STATE_INT;
	else if (state == STATE_EXPSTART \|\| state == STATE_EXPSIGN)
	state = STATE_EXP;
	}
	switch (state) {
	case STATE_START:
	/* Must have just been a - character by itself. */
	tok.type = '-';
	p = start + 1;
	break;
	case STATE_BADOCTAL:
	p = octalend;
	/* fall through... */
	case STATE_INT:
	case STATE_OCTAL:
	tok.type = TOK_INTEGER;
	break;
	case STATE_HEX:
	if (p - start == 2 \|\| (p - start == 3 && *start == '-'))
	p = start + 1;
	tok.type = TOK_INTEGER;
	break;
	case STATE_EXP:
	case STATE_DP:
	tok.type = TOK_FLOAT;
	break;
	case STATE_EXPSIGN:
	p--;
	/* fall through... */
	case STATE_EXPSTART:
	p--;
	tok.type = TOK_FLOAT;
	break;
	}
	tok.start = start;
	tok.len = p - start;
	tok.filename = file->filename;
	tok.linenum = file->linenum;
	file->pos = p;
	return &tok;
	}
	}

	/***********************************************************************
	* lexstring : lex a quoted string
	*
	* Enter: start = start of token
	*
	* Return: tok struct, lifetime until next call to lex
	*/
	static struct tok *
	lexstring(const char *start)
	{
	for (;;) {
	const char *p = start + 1;
	int ch = *p;
	for (;;) {
	if (!ch \|\| ch == '\n')
	lexerrorexit("unterminated string");
	if (ch == '"') {
	tok.type = TOK_STRING;
	tok.start = start + 1;
	tok.len = p - start - 1;
	tok.filename = file->filename;
	tok.linenum = file->linenum;
	file->pos = p + 1;
	return &tok;
	}
	/* Note the IDL spec doesn't seem to allow for escape sequences
	* in strings. */
	ch = *++p;
	}
	}
	}

	/***********************************************************************
	* lexidentifier : lex an identifier
	*
	* Enter: start = start of token
	*
	* Return: tok struct, lifetime until next call to lex
	*/
	static struct tok *
	lexidentifier(const char *start)
	{
	const char *p = start + 1;
	for (;;) {
	int ch = *p;
	if (ch != '_' && (unsigned)(ch - '0') >= 10
	&& (unsigned)((ch & ~0x20) - 'A') > 'Z' - 'A')
	{
	break;
	}
	p++;
	}
	tok.type = TOK_IDENTIFIER;
	tok.start = start;
	tok.len = p - start;
	tok.filename = file->filename;
	tok.linenum = file->linenum;
	file->pos = p;
	/* See if this is a keyword. (This search is a bit n-squared.) */
	{
	unsigned int type = TOK_DOMString;
	p = keywords;
	for (;;) {
	unsigned int len = strlen(p);
	if (!len)
	break;
	if (len == tok.len && !memcmp(start, p, len)) {
	tok.type = type;
	break;
	}
	p += len + 1;
	type++;
	}
	}
	return &tok;
	}

	/***********************************************************************
	* lex : retrieve next token
	*
	* Return: tok struct, lifetime until next call to lex
	*/
	struct tok *
	lex(void)
	{
	const char *p;
	int ch;
	for (;;) {
	if (!file) {
	tok.type = TOK_EOF;
	tok.start = "end of file";
	tok.len = strlen(tok.start);
	return &tok;
	}
	tok.prestart = p = file->pos;
	/* Flush whitespace. */
	for (;;) {
	ch = *p++;
	switch (ch) {
	case ' ':
	case '\t':
	case '\r':
	continue;
	case '\n':
	++file->linenum;
	tok.prestart = p;
	continue;
	}
	break;
	}
	p--;
	if (ch)
	break;
	if (p != file->end)
	lexerrorexit("\\0 byte not allowed");
	file = file->next;
	}
	/* See if we have a comment. */
	tok.start = p;
	if (ch == '/') {
	switch (*++p) {
	case '*':
	return lexblockcomment(p - 1);
	case '/':
	return lexinlinecomment(p - 1);
	}
	tok.type = '/';
	} else {
	/* Handle things that start with '-', which is either '-' as a token,
	* or a number. Handle numbers. */
	if (ch == '-' \|\| (unsigned)(ch - '0') < 10)
	return lexnumber(p);
	/* Handle string. */
	if (ch == '"')
	return lexstring(p);
	/* Handle identifier. */
	if (ch == '_' \|\| (unsigned)((ch & ~0x20) - 'A') <= 'Z' - 'A')
	return lexidentifier(p);
	/* The only multi-symbol token are ... and [] */
	if (ch == '.') {
	tok.type = '.';
	if (*++p == '.' && p[1] == '.') {
	tok.type = TOK_ELLIPSIS;
	p += 2;
	}
	goto done;
	}
	if (ch == '[') {
	tok.type = '[';
	if (*++p == ']') {
	tok.type = TOK_DOUBLEBRACKET;
	p++;
	}
	goto done;
	}
	}
	/* Single symbol token. */
	tok.type = ch;
	p++;
	done:
	tok.filename = file->filename;
	tok.linenum = file->linenum;
	tok.len = p - tok.start;
	file->pos = p;
	return &tok;
	}

	/***********************************************************************
	* outputwidl : output literal Web IDL input that node was parsed from
	*
	* Enter: node = parse node to output literal Web IDL for
	*/
	void
	outputwidl(struct node *node)
	{
	const char start = node->wsstart, end = node->end;
	/* Find the file that start is in. */
	struct file *file = firstfile;
	while (start < file->buf \|\| start >= file->end) {
	file = file->next;
	assert(file);
	}
	/* Find the (current or) next node that has node->start set. Any such
	* node needs to be put inside a <ref> element. */
	while (node && !node->start)
	node = nodewalk(node);
	/* Output until we get to the end. This has to cope with the text
	* spanning multiple input files. */
	for (;;) {
	int final = end >= file->buf && end <= file->end;
	const char *thisend = final ? end : file->end;
	/* Output the Web IDL, omitting comments. */
	while (start != end) {
	const char p, p2, comment, endcomment;
	int ch;
	if (node && start == node->start) {
	/* We are on the start of the present node in the tree
	* walk. Put it in a <ref>. */
	fputs("<ref>", stdout);
	printtext(node->start, node->end - node->start, 1);
	fputs("</ref>", stdout);
	start = node->end;
	/* Skip to the next node with node->start set if any. */
	do
	node = nodewalk(node);
	while (node && !node->start);
	continue;
	}
	p2 = thisend;
	if (node && node->start >= file->buf && node->start < p2)
	p2 = node->start;
	p = memchr(start, '/', p2 - start);
	if (!p) {
	printtext(start, p2 - start, 1);
	if (p2 != thisend) {
	start = p2;
	continue;
	}
	break;
	}
	/* See if we're at the start of a comment. If so find the end. */
	comment = 0;
	if (p + 1 != thisend) {
	switch (p[1]) {
	case '*':
	/* Block comment. */
	comment = p;
	p++;
	do
	p = memchr(p + 1, '*', thisend - p - 1);
	while (p[1] != '/');
	endcomment = p + 2;
	break;
	case '/':
	/* Inline comment. */
	comment = p;
	p = memchr(p, '\n', thisend - p);
	if (!p)
	p = thisend;
	endcomment = p;
	break;
	}
	}
	if (!comment) {
	/* Not at start of comment. */
	p++;
	printtext(start, p - start, 1);
	start = p;
	assert(start <= end);
	continue;
	}
	/* If the comment has only whitespace before it on the line,
	* eat that up. */
	p = comment;
	while (p != start && ((ch = p[-1]) == ' ' \|\| ch == '\t'))
	p--;
	if (p == start \|\| p[-1] == '\n') {
	comment = p;
	/* If the comment has only whitespace after it to the end
	* of the line, eat that and the newline up. This always
	* happens for an inline comment on a line by itself. */
	p = endcomment;
	while (p != thisend && ((ch = *p) == ' ' \|\| ch == '\t'))
	p++;
	if (p != thisend && *p == '\n')
	p++;
	endcomment = p;
	}
	printtext(start, comment - start, 1);
	start = endcomment;
	if (start > thisend)
	start = thisend;
	}
	if (final)
	break;
	file = file->next;
	assert(file);
	start = file->buf;
	}
	}