/* * clex.aikido * * Aikido Language System, * export version: 3.02 * Copyright (c) 2002 Sun Microsystems, Inc. * * Sun Public License Notice * * The contents of this file are subject to the Sun Public License Version 1.0 (the "License"). You * may not use this file except in compliance with the License. A copy of the License is available * at http://www.opensource.org/licenses/sunpublic.php * * The Original Code is Aikido. * The Initial Developer of the Original Code is David Allison on behalf of Sun Microsystems, Inc. * Copyright (C) Sun Microsystems, Inc. 2000-2003. All Rights Reserved. * * * Contributor(s): dallison * * Version: 1.3 * Created by dallison on 4/19/2002 * Last modified by dallison on 03/07/29 */ // lexical analyser // like lex.aikido except handles C language features like // multibyte character contants // @(#)clex.aikido 1.3 03/07/29 import ctype // list of tokens recognized enum Tokens { BAD, EOL, NUMBER, IDENTIFIER, STRING, CHAR, FNUMBER, LONGSTRING } // parameter is vector of lines class Lex (generic lines, originalfile) { if (typeof (lines) != "vector") { lines = [lines] // make into a vector } // must have a vector of strings foreach line lines { if (typeof (line) != "string") { throw "Only works with strings" } } public var line = "" // current line public var currentToken = BAD // current token public var spelling = "" // current identifier or string spelling public generic number = 0 // current number or character private var reservedWords = {} // empty map of reserved words private var tokens = [] // top level tokens public var ch = 0 // current position public var lineno = 0 // display line number public var filename = originalfile // display file name private var index = 0 // index into lines (real line no) public var postfix = "" // postfix for numbers public function addReservedWord (word, tok) { //println ("adding reserved word " + word + " as " + tok) ; reservedWords += {word = tok} } private class TokenNode (c, val, par) { var parent = par var char = c var value = val var children = [] public: // // add a child node to the tree // function add (s, index, tok) { if (s[index] != char) { return false ; } if (index == sizeof (s) - 1) { if (value == BAD) { value = tok return true ; } else { throw "duplicate token" } } index++ foreach child children { if (child.add (s, index, tok)) { return true } } var node = new TokenNode (s[index], BAD, this) ; children += node return node.add (s, index, tok) } // // find the token whose first character is in line[ch]. Advance ch // function find { if (line[ch] != char) { return BAD } ch++ foreach child children { var tok = BAD if ((tok = child.find()) != BAD) { return tok } } return value } } public: function addToken (s, tok) { //println ("adding token " + s + " as " + tok) foreach token tokens { if (token.add (s, 0, tok)) { return } } var node = new TokenNode (s[0], BAD, null) tokens += node node.add (s, 0, tok) } function skipSpaces { while (ch < sizeof (line) && ctype.isspace (line[ch])) { ch++ } } public function reset { lineno = 0 index = 0 ch = 0 } // // read the next line // public function readLine (skipblank = true) { while (index < sizeof (lines)) { line = lines[index++] lineno++ //println ("read line: " + line) if (sizeof (line) == 0) { continue } ch = 0 if (skipblank) { skipSpaces() if (ch == sizeof (line) - 1) { continue } if (ch < sizeof (line) && line[ch] == '#') { // preprocessor output var f = System.split (line, ' ') if (sizeof (f) >= 2) { if (ctype.isdigit (f[1])) { lineno = cast<int>(f[1]) if (sizeof (f) >= 3) { var fn = f[2] if (fn == "\"\"") { filename = originalfile } else { filename = fn[1:sizeof (fn) -2] } } else { filename = originalfile } } } } if (line[ch] == '/' && line[ch+1] == '/') { continue } else { break } } else { break } } } public function eof { return index == sizeof (lines) } var charmap = {'n'='\n', 'r'='\r', 't'='\t', 'a'='\a', 'b'='\b','v'='\v'} function nextToken { var tok = BAD spelling = "" for (;;) { skipSpaces() if (ch == (sizeof (line) - 1) || line[ch] == '#' || (line[ch] == '/' && ch < (sizeof (line) - 1) && line[ch+1] == '/')) { if (eof()) { //System.println ("end of file") currentToken = EOL return } //System.println ("end of line, reading another") readLine() } else { break } } var c = line[ch++] //println ("c = " + c) if (c == '\"' || (c == 'L' && line[ch] == '"')) { //println ("string") if (c == 'L') { ch++ tok = LONGSTRING } else { tok = STRING } while (ch < sizeof (line) && line[ch] != '\"') { if (line[ch] == '\\') { var char = charmap[line[ch+1]] if (typeof char != "none") { spelling += char } else { spelling += line[ch+1] } ch += 2 } else { spelling += line[ch++] } } if (ch != sizeof (line)) { ch++ } } elif (ctype.isalpha (c) || c == '_') { //println ("identifer or resword") spelling += c while (ctype.isalnum (line[ch]) || line[ch] == "_") { spelling += line[ch++] } generic result = reservedWords[spelling] if (typeof (result) != "none") { tok = result } else { tok = IDENTIFIER } } elif (c == '\'') { //println ("char") number = 0 while (ch < sizeof (line) && line[ch] != '\'') { var num = 0 if (line[ch] == '\\') { var char = charmap[line[ch+1]] if (typeof char != "none") { num = char } else { num = line[ch+1] } ch += 2 } else { num = line[ch++] } number = (number << 8) | num } tok = CHAR ch++ } elif (ctype.isdigit (c)) { //println ("found number") spelling += c var dot = false var exp = false var plus = false var hex = false if (c == '0' && ch < sizeof (line) && (line[ch] == 'x' || line[ch] == 'X')) { spelling += line[ch] hex = true ch++ } while (ch < sizeof (line)) { c = line[ch++] if (ctype.isspace (c)) { break } if (c == '.') { if (dot) { break } else { dot = true spelling += c } } elif (!hex && (c == 'E' || c == 'e')) { if (exp) { break } else { exp = true spelling += c } } elif (exp && (c == '+' || c == '-')) { if (plus) { break } else { plus = true spelling += c } } elif (!ctype.isxdigit(c)) { break } else { spelling += c } } ch-- var fp = dot | exp if (fp) { number = cast<real>(spelling) tok = FNUMBER } else { number = cast<int>(spelling) tok = NUMBER } postfix = "" var done = false while (ch < sizeof (line) && !done) { switch (line[ch]) { case 'u': case 'U': case 'l': case 'L': postfix += ctype.toupper(line[ch]) ch++ break default: done = true } } } else { ch-- foreach token tokens { if ((tok = token.find()) != BAD) { break } } } currentToken = tok } public function match (token) { //println ("matching " + token + " with " + currentToken) if (currentToken == token) { //println ("matched!") nextToken() return true } //println ("not matched") return false } public function getIdentifier { if (currentToken == IDENTIFIER) { var id = spelling nextToken() return id } else { throw "Identifier expected" } } public function getString { if (currentToken == STRING) { var id = spelling nextToken() return id } else { throw "String literal expected" } } public function getNumber { //println ("looking for number, currentToken is " + currentToken) if (currentToken == NUMBER) { var n = number nextToken() return n } else { throw "Number expected" } } public function lookaheadChar { skipSpaces() if (ch == (sizeof (line) - 1)) { readLine() } return line[ch] } }