Sophie

Sophie

distrib > Mandriva > 2010.0 > i586 > media > contrib-release > by-pkgid > 1e4be4f6cca2c9a2bfc532dbed99ff6a > files > 26

aikido-1.40-6mdv2010.0.i586.rpm

/*
 * cparse.aikido
 *
 * Aikido Language System,
 * export version: 1.00
 * Copyright (c) 2002-2003 Sun Microsystems, Inc.
 *
 * Sun Public License Notice
 * 
 * The contents of this file are subject to the Sun Public License Version 1.0 (the "License"). You
 * may not use this file except in compliance with the License. A copy of the License is available
 * at http://www.opensource.org/licenses/sunpublic.php
 * 
 * The Original Code is Aikido. 
 * The Initial Developer of the Original Code is David Allison on behalf of Sun Microsystems, Inc. 
 * Copyright (C) Sun Microsystems, Inc. 2000-2003. All Rights Reserved.
 * 
 * 
 * Contributor(s): dallison
 *
 * Version:  1.2
 * Created by dallison on 4/19/2002
 * Last modified by dallison on 03/07/29
 */

/**
 * Main C Parser program.  This defines the global objects and types
 */

native breakpoint(s)

import clex
import hashtable
import list
import stack

package CParser {

    extend Tokens {
        T_WHILE, T_IF, T_ELSE, T_DO, T_FOR,
        T_INT, T_CHAR, T_LONG, T_SHORT, T_FLOAT, T_DOUBLE,
        T_SIGNED, T_UNSIGNED, T_CONST, T_VOLATILE,
        T_STRUCT, T_UNION, T_ENUM, T_VOID,
        T_AUTO, T_STATIC, T_EXTERN, T_REGISTER, T_TYPEDEF,
        T_SIZEOF, T_BREAK, T_CONTINUE, T_RETURN,
        T_SWITCH, T_CASE, T_DEFAULT, T_GOTO, 

        T_SEMICOLON, T_STAR, T_RPAREN, T_LPAREN, T_DOT,
        T_LSQUARE, T_RSQUARE, T_PLUS, T_MINUS, T_SLASH,
        T_PERCENT, T_QUESTION, T_COLON, T_COMMA, T_LBRACE,
        T_RBRACE, T_TILDE, T_CARET, T_BANG, T_AMPERSAND,
        T_BITOR, T_LOGAND, T_LOGOR, T_LSHIFT, T_RSHIFT,
        T_LESS, T_GREATER, T_LESSEQ, T_GREATEREQ, T_EQUAL, T_NOTEQ,
        T_PLUSPLUS, T_MINUSMINUS, T_ASSIGN,
        T_PLUSEQ, T_MINUSEQ, T_STAREQ, T_SLASHEQ, T_PERCENTEQ,
        T_LSHIFTEQ, T_RSHIFTEQ, T_ANDEQ,
        T_OREQ, T_XOREQ, T_ARROW, T_ELLIPSIS,

        // pseudo tokens
        T_POSTINC, T_POSTDEC, T_UMINUS, T_UPLUS, T_CONTENTS,
        T_ADDRESS, T_EXPR, T_NULL, T_COMPOUND, T_FIX, T_FLT,
        T_CAST, T_LABEL

    }

    class CLex (lines, file) extends Lex (lines, file) {
        addReservedWord ("while", T_WHILE)
        addReservedWord ("if", T_IF)
        addReservedWord ("else", T_ELSE)
        addReservedWord ("do", T_DO)
        addReservedWord ("for", T_FOR)
        addReservedWord ("int", T_INT)
        addReservedWord ("char", T_CHAR)
        addReservedWord ("long", T_LONG)
        addReservedWord ("short", T_SHORT)
        addReservedWord ("float", T_FLOAT)
        addReservedWord ("double", T_DOUBLE)
        addReservedWord ("signed", T_SIGNED)
        addReservedWord ("unsigned", T_UNSIGNED)
        addReservedWord ("const", T_CONST)
        addReservedWord ("volatile", T_VOLATILE)
        addReservedWord ("struct", T_STRUCT)
        addReservedWord ("union", T_UNION)
        addReservedWord ("enum", T_ENUM)
        addReservedWord ("auto", T_AUTO)
        addReservedWord ("extern", T_EXTERN)
        addReservedWord ("static", T_STATIC)
        addReservedWord ("register", T_REGISTER)
        addReservedWord ("typedef", T_TYPEDEF)
        addReservedWord ("sizeof", T_SIZEOF)
        addReservedWord ("break", T_BREAK)
        addReservedWord ("continue", T_CONTINUE)
        addReservedWord ("return", T_RETURN)
        addReservedWord ("switch", T_SWITCH)
        addReservedWord ("case", T_CASE)
        addReservedWord ("default", T_DEFAULT)
        addReservedWord ("goto", T_GOTO)
        addReservedWord ("void", T_VOID)

        addToken ("...", T_ELLIPSIS) ;
        addToken ("*", T_STAR) ;
        addToken (")", T_RPAREN) ;
        addToken ("(", T_LPAREN) ;
        addToken (".", T_DOT) ;
        addToken ("[", T_LSQUARE) ;
        addToken ("]", T_RSQUARE) ;
        addToken ("+", T_PLUS) ;
        addToken ("-", T_MINUS) ;
        addToken ("/", T_SLASH) ;
        addToken ("%", T_PERCENT) ;
        addToken ("?", T_QUESTION) ;
        addToken (":", T_COLON) ;
        addToken (",", T_COMMA) ;
        addToken ("{", T_LBRACE) ;
        addToken ("}", T_RBRACE) ;
        addToken ("~", T_TILDE) ;
        addToken ("^", T_CARET) ;
        addToken ("!", T_BANG) ;
        addToken ("&", T_AMPERSAND) ;
        addToken ("|", T_BITOR) ;
        addToken ("&&", T_LOGAND) ;
        addToken ("||", T_LOGOR) ;
        addToken ("<<", T_LSHIFT) ;
        addToken (">>", T_RSHIFT) ;
        addToken ("<", T_LESS) ;
        addToken (">", T_GREATER) ;
        addToken ("<=", T_LESSEQ) ;
        addToken (">=", T_GREATEREQ) ;
        addToken ("==", T_EQUAL) ;
        addToken ("!=", T_NOTEQ) ;
        addToken ("++", T_PLUSPLUS) ;
        addToken ("--", T_MINUSMINUS) ;
        addToken ("=", T_ASSIGN) ;
        addToken ("+=", T_PLUSEQ) ;
        addToken ("-=", T_MINUSEQ) ;
        addToken ("*=", T_STAREQ) ;
        addToken ("/=", T_SLASHEQ) ;
        addToken ("%=", T_PERCENTEQ) ;
        addToken ("<<=", T_LSHIFTEQ) ;
        addToken (">>=", T_RSHIFTEQ) ;
        addToken ("&=", T_ANDEQ) ;
        addToken ("|=", T_OREQ) ;
        addToken ("^=", T_XOREQ) ;
        addToken (";", T_SEMICOLON) ;
        addToken ("->", T_ARROW) ;

    }

    function printToken (tok, s) {
        switch (tok) {
        case T_STAR:
            '*' -> s
            break
        case T_DOT:
            '.' -> s
            break
        case T_PLUS:
            '+' -> s
            break
        case T_MINUS:
            '-' -> s
            break
        case T_SLASH:
            '/' -> s
            break
        case T_PERCENT:
            '%' -> s
            break
        case T_COMMA:
            ',' -> s
            break
        case T_TILDE:
            '~' -> s
            break
        case T_CARET:
            '^' -> s
            break
        case T_BANG:
            '!' -> s
            break
        case T_AMPERSAND:
            '&' -> s
            break
        case T_BITOR:
            '|' -> s
            break
        case T_LOGAND:
            "&&" -> s
            break
        case T_LOGOR:
            "||" -> s
            break
        case T_LSHIFT:
            "<<" -> s
            break
        case T_RSHIFT:
            ">>" -> s
            break
        case T_LESS:
            '<' -> s
            break
        case T_GREATER:
            '>' -> s
            break
        case T_LESSEQ:
            "<=" -> s
            break
        case T_GREATEREQ:
            ">=" -> s
            break
        case T_EQUAL:
            "==" -> s
            break
        case T_NOTEQ:
            "!=" -> s
            break
        case T_PLUSPLUS:
            "++" -> s
            break
        case T_MINUSMINUS:
            "--" -> s
            break
        case T_ASSIGN:
            '=' -> s
            break
        case T_PLUSEQ:
            "+=" -> s
            break
        case T_MINUSEQ:
            "-=" -> s
            break
        case T_STAREQ:
            "*=" -> s
            break
        case T_SLASHEQ:
            "/=" -> s
            break
        case T_PERCENTEQ:
            "%=" -> s
            break
        case T_LSHIFTEQ:
            "<<=" -> s
            break
        case T_RSHIFTEQ:
            ">>=" -> s
            break
        case T_ANDEQ:
            "&=" -> s
            break
        case T_OREQ:
            "|=" -> s
            break
        case T_XOREQ:
            "^=" -> s
            break
        case T_ARROW:
            "->" -> s
            break
        case T_POSTINC:
            "++" -> s
            break
        case T_POSTDEC:
            "--" -> s
            break
        case T_UMINUS:
            "-" -> s
            break
        case T_UPLUS:
            "+" -> s
            break
        case T_CONTENTS:
            '*' -> s
            break
        case T_ADDRESS:
            '&' -> s
            break
        case T_FIX:
            " FIX " -> s
            break
        case T_FLT:
            " FLT " -> s
            break
        }
    }

    public class Parser (cppopts) {
        var cpp = "gcc -E "             // use C compiler as the preprocessor
        foreach c cppopts {
            cpp += " " + c
        }
        var tmpfile = tmpnam()

        var lex = null

        var numErrors = 0
        const maxErrors = 50

       
        var flags = 0

        function error (s) { 
            println ("\"" + lex.filename + "\", line " + lex.lineno + ": " + s, stderr)
            numErrors++
            if (numErrors > maxErrors) {
                println ("Too many errors, goodbye", stderr)
                exit (1)
            }
        }

        function warning (s) { 
            println ("\"" + lex.filename + "\", line " + lex.lineno + ": warning: " + s, stderr)
        }

       
        interface Printable {
            function print (s, indent)
        }

        interface Dumpable {
            function dump (stream)
        }

        class ParseNode implements Printable {
            static var nodecount = 0

            public var id = 0

            public function print (s, indent) {
            }

            public function dump (stream) {
                if (id != 0) {
                    throw "Attempt to dump node " + id + " multiple times"
                }
                id = ++nodecount
                format ("\n%-8d %-25s", id, typeof (this)) -> stream
                //['\n', id, '\t', typeof (this), '\t'] -> stream
            }

        }

        const CINT = 0x000001
        const CCHAR = 0x000002
        const CFLOAT = 0x000004
        const CDOUBLE = 0x000008
        const CVOID = 0x000010
        const CSIGNED = 0x000020
        const CUNSIGNED = 0x000040
        const CLONG = 0x000080
        const CSHORT = 0x000100
        const CSTRUCT = 0x000200
        const CUNION = 0x000400
        const CENUM = 0x000800
        const CCONST = 0x0001000
        const CVOLATILE = 0x002000
        const CBITFIELD = 0x004000
        const CLONGLONG = 0x008000

        // storage classes
        const sSTATIC = 0x0001
        const sAUTO = 0x0002
        const sEXTERN = 0x0010
        const sTYPEDEF = 0x0020
        const sREGISTER = 0x0040
        const sARGUMENT = 0x0080
        const sENUMCONST = 0x0100
        const sSTRTAG = 0x0200

        function fakename {
            static var fnnum = 0
            return "__fake__" + fnnum++
        }

        class Symbol...

        class Function (symbol) {
            var dependencies = {}               // map of name vs symbol
            var body = null
            var busy = false

        public:
            function getSymbol {
                return symbol
            }

            function setBody (b) {
                body = b
            }
 
            function getBody() {
                return body
            }

            function addDependency (sym) {
                var name = sym.getName()
                if (!(name in dependencies)) {
                     dependencies[name] = sym
                }
            }

            function showDependencies {
                println ("function " + symbol.getName() + " dependencies:")
                foreach dep dependencies {
                    println ("    " + dep.first)
                }
            }

            function checkDependencies (parent) {
                if (busy) {
                    error ("Recursive path to " + symbol.getName() + " from " + (parent << 1))
                } else {
                    busy = true
                    var par = parent + "." + symbol.getName()
                    foreach dep dependencies {
                        var func = dep.second.getDetails()
                        if (func != null) {
                            func.checkDependencies (par)
                        }
                    }
                    busy = false
                }
            }

            function print (s, indent) {
               if (body != null) {
                   body.print (s,indent)
               } 
            }
        }

        var currentFunction = null

        class Symbol (name, stor = 0) extends ParseNode {
            var type = null         // type of symbol
            var predefined = false  // symbol is predefined
            var offset = 0          // offset into things
            var storage = stor
            generic details = null         // function body, initial value or enum value

            public function setType (t) {
                type = t
            }

            public function setStorage (s) {
                storage = s
            }

            public function getStorage() {
                return storage
            }

            public function getName {
                return name
            }

            public function getType {
                return type
            }

            public function predefine {
                predefined = true
            }
     
            public function isPredefined {
                return predefined
            }

            public function define {
                predefined = false
            }

            public function getSize() {
                return type.getSize()
            }

            public function getOffset() {
                return offset
            }

            public function setOffset (off) {
                offset = off
            }

            public function getDetails() {
                return details
            }

            public function setDetails (d) {
                details = d
            }

            public function dump (stream) {
                if (type.id == 0) {
                    type.dump (stream)
                }
                ParseNode.dump (stream)
                [name, ' ', type.id, ' ', predefined, ' ', offset, ' ', storage] -> stream
            }
        }

        // complete registry of all symbols
        var variables = []
        var functions = []

        function addFunction (sym) {
            append (functions, sym)
        }

        function addVariable (sym) {
            append (variables, sym)
        }

        class SymbolTable (size) extends Hashtable (size) {
            public function insert (symbol) {
                var name = symbol.getName()
                var oldsym = get (name)
                if (typeof (oldsym) != "none") {
                    if (oldsym.isPredefined()) {
                        oldsym.define()
                        oldsym.setType (symbol.getType())
                        return oldsym
                    }
                    if (oldsym.getStorage() == sEXTERN || symbol.getStorage() == sEXTERN) {
                        oldsym.setType (symbol.getType())
                        return oldsym
                    }
                    error ("Duplicate definition of symbol " + name)
                    return oldsym
                } else {
                    put (name, symbol)
                    if (symbol.getType() != null) {
                        if (symbol.getType().isFunction()) {
                            addFunction (symbol)
                        } else {
                            addVariable (symbol)
                        }
                    }
                    return symbol
                }
            }

            public function find (name) {
                var sym = get (name)
                if (typeof (sym) != "none") {
                    return sym
                }
                return null
            }
        }

        class SymbolStack (topsize, size) extends List {
            public function push {
                var sz = sizeof(this) == 0 ? topsize : size
                var table = new SymbolTable (sz)
                insertEnd (table)
            }

            public function pop {
                erase (lastitem)
            }

            public function findTop (name) {
                if (lastitem == null) {
                    return null
                }
                return lastitem.value.find (name)
            }

            public function find (name) {
                var item = lastitem
                while (item != null) {
                   var sym = item.value.find (name)
                   if (sym != null) {
                       return sym
                   }
                   item = item.prev
                }
                return null
            }

            public function insert (sym) {
                return lastitem.value.insert (sym)
            }

            public function insertBottom (sym) {
                items.value.insert (sym)
            }

            public operator sizeof {
                return List.operator sizeof()
            }
        }

        var symbolStack = new SymbolStack(1009, 101)
        var tagStack = new SymbolStack(101, 11)

        // global symbol and tag tables
        symbolStack.push()
        tagStack.push()

        function findSymbol (name) {
            return symbolStack.find (name)
        }

        function findTopSymbol (name) {
            return symbolStack.findTop (name)
        }

        function insertSymbol (sym) {
            return symbolStack.insert (sym)
        }

        

        // insert a symbol at the bottom of the stack
        function insertGlobalSymbol (tag) {
            symbolStack.insertBottom (tag)
        }

        
        function findTag (name) {
            return tagStack.find (name)
        }

        function findTopTag (name) {
            return tagStack.findTop (name)
        }

        function insertTag (tag) {
            tagStack.insert (tag)
        }

        function pushScope {
            symbolStack.push()
            tagStack.push()
        }

        function popScope {
            symbolStack.pop()
            tagStack.pop()
        }

        function functionBody ...
        function storage...
        function type...
        function declaration...
        function needsemi...
        function staticInitializer...
        function implicitType...

        function needbrack (brack) {
            if (!lex.match (brack)) {
                var b = '('
                switch (brack) {
                case T_RPAREN:
                    b = ')'
                    break
                case T_LBRACE:
                    b = '{'
                    break
                case T_RBRACE:
                    b = '}'
                    break
                case T_LSQUARE:
                    b = '['
                    break
                case T_RSQUARE:
                    b = ']'
                    break
                }
                error ("'" + b + "' expected")
            }
        }

        public function parse (file) {
            system (cpp + " " + file + " > " + tmpfile)
            
            var lines = readfile (tmpfile)
            lex = new CLex (lines, file)

            lex.readLine()
            lex.nextToken()
            while (!lex.eof()) {
                var s = storage()
                var done = false
                var typ = type()
                if (typ == null) {
                    typ = implicitType()
                }
                while (!done) {
                    var decl = declaration (s, typ)
                    if (decl != null) {
                        var t = decl.getType()
                        decl = insertSymbol (decl)
                        if (t.isFunction()) {
                            if (lex.match (T_LBRACE)) {
                                var func = new Function (decl)
                                currentFunction = func
                                var body = functionBody(decl)
                                func.setBody(body)
                                decl.setDetails (func)
                                println (decl.getName())
                                needbrack (T_RBRACE)
                                done = true
                            } else {
                                decl.predefine()
                            }

                        } elif (lex.match (T_ASSIGN)) {             // initializer
                            var initval = staticInitializer (t)
                            decl.setDetails (initval)
                        }
                        if (!lex.match (T_COMMA)) {
                            break
                        }
                    } else {
                        error ("Declaration expected")
                        lex.nextToken()
                    }
                }
                if (!done) {
                    needsemi()
                }
            }
            remove (tmpfile)
        }
    }
}

var cppopts = []
var files = []

function usage() {
    println ("usage: cparse [-Idir] [-Dmacro[=value]] [-Umacro] file ...", stderr)
    exit (1)
}


for (var i = 0 ; i < sizeof (args) ;i++) {
    var arg = args[i]
    if (arg[0] == '-') {
        if (arg[1] == 'I' || arg[1] == 'D' || arg[1] == 'U') {
            append (cppopts, arg)
        } else {
            switch (arg) {
                // other options here
            default:
                usage()
            }
        }
    } else {
         append (files, arg)
    }
}

if (sizeof (files) == 0) {
    usage()
}

var parser = new CParser.Parser (cppopts)
parser.declarevarargs() ;

foreach file files {
    parser.parse(file)
    parser.printFunctions()
}