Sophie: aikido-1.40-6mdv2010.0 i586

aikido-1.40-6mdv2010.0.i586.rpm

/*
 * clex.aikido
 *
 * Aikido Language System,
 * export version: 3.02
 * Copyright (c) 2002 Sun Microsystems, Inc.
 *
 * Sun Public License Notice
 * 
 * The contents of this file are subject to the Sun Public License Version 1.0 (the "License"). You
 * may not use this file except in compliance with the License. A copy of the License is available
 * at http://www.opensource.org/licenses/sunpublic.php
 * 
 * The Original Code is Aikido. 
 * The Initial Developer of the Original Code is David Allison on behalf of Sun Microsystems, Inc. 
 * Copyright (C) Sun Microsystems, Inc. 2000-2003. All Rights Reserved.
 * 
 * 
 * Contributor(s): dallison
 *
 * Version:  1.3
 * Created by dallison on 4/19/2002
 * Last modified by dallison on 03/07/29
 */

// lexical analyser

// like lex.aikido except handles C language features like
// multibyte character contants

// @(#)clex.aikido	1.3 03/07/29

import ctype

// list of tokens recognized
enum Tokens {
    BAD, EOL,
    NUMBER, IDENTIFIER, STRING, CHAR, FNUMBER, LONGSTRING
}

// parameter is vector of lines
class Lex (generic lines, originalfile) {
    if (typeof (lines) != "vector") {
        lines = [lines]		// make into a vector
    }

    // must have a vector of strings
    foreach line lines {
       if (typeof (line) != "string") {
           throw "Only works with strings"
       }
    }

    public var line = ""				// current line
    public var currentToken = BAD			// current token
    public var spelling = ""				// current identifier or string spelling
    public generic number = 0				// current number or character

    private var reservedWords = {}			// empty map of reserved words
    private var tokens = []				// top level tokens
    public var ch = 0					// current position
    public var lineno = 0                               // display line number
    public var filename = originalfile                  // display file name
    private var index = 0                               // index into lines (real line no)
    public var postfix = ""                             // postfix for numbers

    public function addReservedWord (word, tok) {
        //println ("adding reserved word " + word + " as " + tok) ;
        reservedWords += {word = tok}
    }
    
    private class TokenNode (c, val, par) {
        var parent = par
        var char = c
        var value = val

        var children = []

    public:

        // 
        // add a child node to the tree
        //
        function add (s, index, tok) {
            if (s[index] != char) {
                return false ;
            }
            if (index == sizeof (s) - 1) {
                if (value == BAD) {
                    value = tok
                    return true ;
                } else {
                    throw "duplicate token" 
                }
            }

            index++
            foreach child children {
                if (child.add (s, index, tok)) {
                    return true 
                }
            }

            var node = new TokenNode (s[index], BAD, this) ;
            children += node 
            return node.add (s, index, tok)
        }

        //
        // find the token whose first character is in line[ch].  Advance ch
        //
        function find {
            if (line[ch] != char) {
                return BAD
            }
            ch++
            foreach child children {
                var tok = BAD
                if ((tok = child.find()) != BAD) {
                    return tok 
                }
            }
            return value 
        }
    }

public:

    function addToken (s, tok) {
        //println ("adding token " + s + " as " + tok) 
        foreach token tokens {
            if (token.add (s, 0, tok)) {
                return
            }
        }
        var node = new TokenNode (s[0], BAD, null)
        tokens += node
        node.add (s, 0, tok)
    }

    function skipSpaces {
        while (ch < sizeof (line) && ctype.isspace (line[ch])) {
            ch++
        }
    }

    public function reset {
        lineno = 0
        index = 0
        ch = 0
    }

    //
    // read the next line
    //
    
    public function readLine (skipblank = true) {
         while (index < sizeof (lines)) {
            line = lines[index++]
            lineno++
            //println ("read line: " + line)
            if (sizeof (line) == 0) {
                continue
            }
            ch = 0
            if (skipblank) {
                skipSpaces()
                if (ch == sizeof (line) - 1) {
                    continue
                }
                if (ch < sizeof (line) && line[ch] == '#') {                // preprocessor output
                    var f = System.split (line, ' ')
                    if (sizeof (f) >= 2) {
                        if (ctype.isdigit (f[1])) {
                            lineno = cast<int>(f[1])
                            if (sizeof (f) >= 3) {
                               var fn = f[2]
                               if (fn == "\"\"") {
                                   filename = originalfile 
                               } else {
                                   filename = fn[1:sizeof (fn) -2]
                               }
                            } else {
                               filename = originalfile 
                            }
                        }
                    }
                }
                if (line[ch] == '/' && line[ch+1] == '/') {
                    continue
                } else {
                    break
                }
            } else {
                break
            }
        }
    }

    public function eof {
        return index == sizeof (lines) 
    }


    var charmap = {'n'='\n', 'r'='\r', 't'='\t', 'a'='\a', 'b'='\b','v'='\v'}
    function nextToken {
        var tok = BAD

        spelling = ""
        for (;;) {
            skipSpaces()
            if (ch == (sizeof (line) - 1) || line[ch] == '#' || (line[ch] == '/' && ch < (sizeof (line) - 1) && line[ch+1] == '/')) {
                if (eof()) {
                    //System.println ("end of file")
                    currentToken = EOL
                    return
                }
                //System.println ("end of line, reading another")
                readLine()
            } else {
                break
            }
        }
        var c = line[ch++]
        //println ("c = " + c)
        if (c == '\"' || (c == 'L' && line[ch] == '"')) {
            //println ("string")
            if (c == 'L') {
                ch++
                tok = LONGSTRING
            } else {
                tok = STRING
            }
            while (ch < sizeof (line) && line[ch] != '\"') {
                if (line[ch] == '\\') {
                    var char = charmap[line[ch+1]]
                    if (typeof char != "none") {
                        spelling += char
                    } else {
                        spelling += line[ch+1]
                    }
                    ch += 2
                } else {
                    spelling += line[ch++]
                }
            }
            if (ch != sizeof (line)) {
                ch++ 
            }
        } elif (ctype.isalpha (c) || c == '_') {
            //println ("identifer or resword")
            spelling += c
            while (ctype.isalnum (line[ch]) || line[ch] == "_") {
                spelling += line[ch++]
            }
            generic result = reservedWords[spelling]
            if (typeof (result) != "none") {
                tok = result 
            } else {
                tok = IDENTIFIER
            }   
        } elif (c == '\'') {
            //println ("char")
            number = 0
            while (ch < sizeof (line) && line[ch] != '\'') {
                var num = 0
                if (line[ch] == '\\') {
                    var char = charmap[line[ch+1]]
                    if (typeof char != "none") {
                        num = char
                    } else {
                        num = line[ch+1]
                    }
                    ch += 2
                } else {
                    num = line[ch++]
                }
                number = (number << 8) | num
            }
            tok = CHAR
            ch++
        } elif (ctype.isdigit (c)) {
            //println ("found number")
            spelling += c
            var dot = false
            var exp = false
            var plus = false
            var hex = false
            if (c == '0' && ch < sizeof (line) && (line[ch] == 'x' || line[ch] == 'X')) {
                spelling += line[ch]
                hex = true
                ch++
            }
            while (ch < sizeof (line)) {
                c = line[ch++]
                if (ctype.isspace (c)) {
                   break
                }
                if (c == '.') {
                   if (dot) {
                       break
                   } else {
                       dot = true
                       spelling += c
                   }
                } elif (!hex && (c == 'E' || c == 'e')) {
                   if (exp) {
                       break
                   } else {
                       exp = true
                       spelling += c
                   }
                } elif (exp && (c == '+' || c == '-')) {
                   if (plus) {
                       break
                   } else {
                       plus = true
                       spelling += c
                   }
                } elif (!ctype.isxdigit(c)) {
                    break
                } else {
                    spelling += c
                }
            }

            ch--
            var fp = dot | exp
            if (fp) {
                number = cast<real>(spelling)
                tok = FNUMBER
            } else {
                number = cast<int>(spelling)
                tok = NUMBER
            }
            postfix = ""
            var done = false
            while (ch < sizeof (line) && !done) {
                switch (line[ch]) {
                case 'u':
                case 'U':
                case 'l':
                case 'L':
                    postfix += ctype.toupper(line[ch])
                    ch++
                    break
                default:
                    done = true
                }
            }
        } else {
            ch--
            foreach token tokens {
                if ((tok = token.find()) != BAD) {
                    break
                }
            }
        }
    currentToken = tok
    }


    public function match (token) {
        //println ("matching " + token + " with " + currentToken) 
        if (currentToken == token) {
            //println ("matched!")
            nextToken()
            return true
        }
        //println ("not matched")
        return false
    }    
    
    public function getIdentifier {
        if (currentToken == IDENTIFIER) {
            var id = spelling
            nextToken()
            return id
        } else {
            throw "Identifier expected"
        }
    }

    public function getString {
        if (currentToken == STRING) {
            var id = spelling
            nextToken()
            return id
        } else {
            throw "String literal expected"
        }
    }

    public function getNumber {
        //println ("looking for number, currentToken is " + currentToken) 
        if (currentToken == NUMBER) {
            var n = number
            nextToken()
            return n
        } else {
            throw "Number expected"
        }
    }

    public function lookaheadChar {
        skipSpaces()
        if (ch == (sizeof (line) - 1)) {
            readLine()
        }
        return line[ch]
    }

    
}