23 Nov 2012, 20:07
Generic-user-small

George S. Cowan (1 post)

I take it all back, almost. The only annotation that seems to be missing is the annotation for package.


Original Post - - - - - The tour/Java.g4 grammar only allows annotations for an enumConstant.

This raises a slightly larger issue: is there a need for a simple Java grammar that does not catch every sytax error but that does allow every legal Java program to parse?

Such a grammar would be useful as a starting point for tools that process Java programs that have already been compiled or will be compiled later. But stricter Java grammars already exist, e.g., JavaLR.g4 at github.com/antlr/grammars-v4. Is there a need for a simpler Java grammar?

23 Nov 2012, 21:02
Generic-user-small

Bernard Kaiflin (8 posts)

Answer to the line “The tour/Java.g4 grammar only allows annotations for an enumConstant.” of the Original Post. I have used this grammar to extract signatures, with four slight modifications :

compilationUnit
@init {System.out.println("----- grammar Java last update 1359");}

to be sure to work with the latest version.

classDeclaration
    :   classSignature classBody
    ;

classSignature
    :   'class' Identifier typeParameters? ('extends' type)?
        ('implements' typeList)?
    ;

to be able to print only the declaration line, idem for methods and constructors :

methodDeclaration
    :   methodSignature methodDeclarationRest
    ;

methodSignature
    :   type Identifier formalParameters ('[' ']')*
    |   'void' Identifier formalParameters
    ;
constructorDeclaration
    :   constructorSignature constructorBody
    ;

constructorSignature
    :   typeParameters? Identifier formalParameters
        ('throws' qualifiedNameList)?
    ;

The listener captures these three events and prints the signatures. (Note : translated from Ruby. As I don’t know Java, it may look ugly.)

===== file MyJavaListener.java =====
 
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import java.util.List;

public class MyJavaListener extends JavaBaseListener {
    String            current_indent = "";
    String            indent;
    int               logical_level  = 0;
	ParserRuleContext parent;
	String            spaces = "                                        ";
    CommonTokenStream tokens;

    public MyJavaListener(CommonTokenStream p_tokens) {
        System.out.println("--- MyJavaListener 1409");
        tokens = p_tokens;
    }

    public void enterClassDeclaration(JavaParser.ClassDeclarationContext ctx) {
        logical_level += 1;
    }

    public void exitClassDeclaration(JavaParser.ClassDeclarationContext ctx) {
        logical_level -= 1;
    }

    public void enterClassSignature(JavaParser.ClassSignatureContext ctx) {
		ParserRuleContext cdecl, parent;
		Token start, stop;
        cdecl = parent = parent(ctx); // classDeclaration
//        System.out.println("name=" + JavaParser.ruleNames[parent.getRuleIndex()]);
        parent = parent(parent);      // typeDeclaration or member
//        System.out.println("name=" + JavaParser.ruleNames[parent.getRuleIndex()]);

        if (JavaParser.ruleNames[parent.getRuleIndex()] == "typeDeclaration") {
            List<JavaParser.ClassOrInterfaceModifierContext> mods = 
                ((JavaParser.TypeDeclarationContext)parent).classOrInterfaceModifier();
            if (mods.size() > 0)
                start = mods.get(0).getStart();
            else
                start = cdecl.getStart();
        }
        else if (JavaParser.ruleNames[parent.getRuleIndex()] == "member") {
            parent = parent(parent); // classBodyDeclaration
            start  = parent.getStart();
        }
		else
            start = ctx.getStart();

        stop = ctx.getStop();
        System.out.println(current_indent + tokens.getText(start, stop));
        current_indent = indent();
    }

    public void exitConstructorSignature(JavaParser.ConstructorSignatureContext ctx) {
        Token start, stop;
        parent = parent(ctx);    // constructorDeclaration
        parent = parent(parent); // member
        parent = parent(parent); // classBodyDeclaration
        start  = parent.getStart();
        stop   = ctx.getStop();
        System.out.println(indent() + tokens.getText(start, stop));
    }

    public void exitMethodSignature(JavaParser.MethodSignatureContext ctx) {
        Token start, stop;
        parent = parent(ctx);    // methodDeclaration
        parent = parent(parent); // member
        parent = parent(parent); // classBodyDeclaration
        start  = parent.getStart();
        stop   = ctx.getStop();
        System.out.println(indent() + tokens.getText(start, stop));
    }

    public String indent() {
        return spaces.substring(0, logical_level * 4);
	}

    public ParserRuleContext parent(ParserRuleContext p_ctx) {
        return p_ctx.getParent();
	}
}

Test program :

===== file TestParseJava.java =====

import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*;

import java.io.FileInputStream;
import java.io.InputStream;

public class TestParseJava {
    public static void main(String[] args) throws Exception {
        String input_file = null; 

        if (args.length > 0)
 		    input_file = args[0];

        InputStream input_stream = System.in;

        if (input_file != null)
			input_stream = new FileInputStream(input_file);
			
		System.out.println("TestParseJava.java 1052, input_file=" + input_file);
			
        ANTLRInputStream input = new ANTLRInputStream(input_stream); 
        JavaLexer lexer = new JavaLexer(input); 
        CommonTokenStream tokens = new CommonTokenStream(lexer); 
        JavaParser parser = new JavaParser(tokens); 
        ParseTree tree = parser.compilationUnit();
        System.out.println("<<< parsing finished");

        // Define tha Java Listener
        MyJavaListener java_listener = new MyJavaListener(tokens);

        // Walk the tree created during the parse, trigger callbacks
        ParseTreeWalker walker = new ParseTreeWalker();
        walker.walk(java_listener, tree);
    }
}

Some test input :

===== file tsig.java =====

public class BisonCParser extends Parser {
	public static class Y_parser_ruleContext extends ParserRuleContext {
		public TerminalNode ID() { return getToken(BisonCParser.ID, 0); }
		public Y_parser_ruleContext(ParserRuleContext parent, int state) {
			super(parent, state);
		}
		@Override public int getRuleIndex() { return RULE_y_parser_rule; }
		@Override
        public void exitRule(ParseTreeListener listener) {
			if ( listener instanceof BisonCListener ) ((BisonCListener)listener).exitY_parser_rule(this);
		}
	}

	public final Y_parser_ruleContext y_parser_rule() throws RecognitionException {
		Y_parser_ruleContext _localctx = new Y_parser_ruleContext(_ctx, 18);
		enterRule(_localctx, RULE_y_parser_rule);
		int _la;
		return _localctx;
	}
}

Execution :

$ echo $CLASSPATH
.:/usr/local/lib/antlr-4.0b3-complete.jar
$ alias
alias antlr4='java -jar /usr/local/lib/antlr-4.0b3-complete.jar'
$ antlr4 Java.g4 
$ javac Java*.java
$ javac TestParseJava.java 
$ java TestParseJava tsig.java 
TestParseJava.java 1052, input_file=tsig.java
----- grammar Java last update 1359
<<< parsing finished
--- MyJavaListener 1409
public class BisonCParser extends Parser
    public static class Y_parser_ruleContext extends ParserRuleContext
        public TerminalNode ID()
        public Y_parser_ruleContext(ParserRuleContext parent, int state)
        @Override public int getRuleIndex()
        @Override
        public void exitRule(ParseTreeListener listener)
    public final Y_parser_ruleContext y_parser_rule()
23 Nov 2012, 20:59
Generic-user-small

Bernard Kaiflin (8 posts)

For the curious, this is the JRuby equivalent. First I write a Ruby listener, then, based on a small Ruby grammar, a tool extracts the enter/exit methods and automatically generates the corresponding Java listener. This is still in an experimental state.

===== file javaListener.rb =====

    # JRuby listener for grammar Java.g4
class JavaListener
    def initialize
        puts "in JRuby JavaListener 1427"
        @logical_level  = 0
        @current_indent = indent
    end

    def enterClassDeclaration
        @logical_level += 1
#        puts ">enterClassDeclaration #{@logical_level}"
    end

    def exitClassDeclaration(p_ctx)
        @logical_level -= 1
#        puts "<exitClassDeclaration #{@logical_level}"
    end

    def enterClassSignature(p_ctx)
#        puts "#{name=@parser.ruleNames[p_ctx.getRuleIndex]}"
        cdecl = parent = parent(p_ctx) # classDeclaration
#        puts "#{name=@parser.ruleNames[parent.getRuleIndex]}"
        parent = parent(parent) # typeDeclaration or member
#        puts "#{name=@parser.ruleNames[parent.getRuleIndex]}"

        if @parser.ruleNames[parent.getRuleIndex] == "typeDeclaration"
        then
            if parent.classOrInterfaceModifier.size > 0
            then
                start = parent.classOrInterfaceModifier[0].getStart
            else
                start = cdecl.getStart
            end
        elsif @parser.ruleNames[parent.getRuleIndex] == "member"
            parent = parent(parent) # classBodyDeclaration
            start  = parent.getStart
        end

        stop = p_ctx.getStop

        puts "#{@current_indent}#{@tokens.getText(start, stop)}"
        @current_indent = indent
    end

    def exitConstructorSignature(p_ctx)
        parent = parent(p_ctx)  # constructorDeclaration
        parent = parent(parent) # member
        parent = parent(parent) # classBodyDeclaration
#        puts "#{name=@parser.ruleNames[parent.getRuleIndex]}"
        start  = parent.getStart
        stop   = p_ctx.getStop
        puts "#{indent}#{@tokens.getText(start, stop)}"
    end

    def exitMethodSignature(p_ctx)
        parent = parent(p_ctx)  # methodDeclaration
        parent = parent(parent) # member
        parent = parent(parent) # classBodyDeclaration
#        puts "#{name=@parser.ruleNames[parent.getRuleIndex]}"
        start  = parent.getStart
        stop   = p_ctx.getStop
        puts "#{indent}#{@tokens.getText(start, stop)}"
    end

    def indent
        " " * @logical_level * 4
    end

    def parent(p_ctx)
        p_ctx.getParent
    end

    def setSource(p_lexer, p_tokens, p_parser)
        puts "JavaListener#setSource, tokens size=#{p_tokens.size}"
        @lexer  = p_lexer
        @parser = p_parser
        @tokens = p_tokens
    end
end # class JavaListener

===== file JavaListenerGen.java ====

import org.antlr.v4.runtime.CommonTokenStream;
import org.jruby.embed.ScriptingContainer;

public class JavaListenerGen extends JavaBaseListener {
    Object my_interface;
    ScriptingContainer ruby_embedded;
    CommonTokenStream tokens;

    public JavaListenerGen(CommonTokenStream p_tokens) {
        System.out.println("--- JavaListenerGen 1434");
        tokens = p_tokens;
        ruby_embedded = new ScriptingContainer();
        ruby_embedded.runScriptlet("require 'javaListener.rb'");
        System.out.println("--- about to JavaListener.new");
        my_interface = ruby_embedded.runScriptlet("JavaListener.new");
    }
    
    void setSource(Object p_lexer, Object p_parser) {
        ruby_embedded.callMethod(my_interface, "setSource", p_lexer, tokens, p_parser);
    }
    public void enterClassDeclaration(JavaParser.ClassDeclarationContext ctx) {
        ruby_embedded.callMethod(my_interface, "enterClassDeclaration");
    }
    public void exitClassDeclaration(JavaParser.ClassDeclarationContext ctx) {
        ruby_embedded.callMethod(my_interface, "exitClassDeclaration", ctx);
    }
    public void enterClassSignature(JavaParser.ClassSignatureContext ctx) {
        ruby_embedded.callMethod(my_interface, "enterClassSignature", ctx);
    }
    public void exitConstructorSignature(JavaParser.ConstructorSignatureContext ctx) {
        ruby_embedded.callMethod(my_interface, "exitConstructorSignature", ctx);
    }
    public void exitMethodSignature(JavaParser.MethodSignatureContext ctx) {
        ruby_embedded.callMethod(my_interface, "exitMethodSignature", ctx);
    }
}

===== file ParseJava.java =====

import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*;

import java.io.FileInputStream;
import java.io.InputStream;

public class ParseJava {
    public static void main(String[] args) throws Exception {
        String input_file = null; 

        if (args.length > 0)
 		    input_file = args[0];

        InputStream input_stream = System.in;

        if (input_file != null)
			input_stream = new FileInputStream(input_file);
			
		System.out.println("ParseJava.java 1431, input_file=" + input_file);
			
        ANTLRInputStream input = new ANTLRInputStream(input_stream); 
        JavaLexer lexer = new JavaLexer(input); 
        CommonTokenStream tokens = new CommonTokenStream(lexer); 
        JavaParser parser = new JavaParser(tokens); 
        ParseTree tree = parser.compilationUnit();
		System.out.println("<<< parsing finished");

        // Define tha Java Listener which triggers JRuby listener's callbacks
		JavaListenerGen java_listener = new JavaListenerGen(tokens);
		java_listener.setSource(lexer, parser);

        // Walk the tree created during the parse, trigger callbacks
        ParseTreeWalker walker = new ParseTreeWalker();
		walker.walk(java_listener, tree);
        System.out.println();
    }
}

===== Execution =====

$ java ParseJava tsig.java 
ParseJava.java 1431, input_file=tsig.java
----- grammar Java last update 1359
<<< parsing finished
--- JavaListenerGen 1434
--- about to JavaListener.new
in JRuby JavaListener 1427
JavaListener#setSource, tokens size=245
public class BisonCParser extends Parser
    public static class Y_parser_ruleContext extends ParserRuleContext
        public TerminalNode ID()
        public Y_parser_ruleContext(ParserRuleContext parent, int state)
        @Override public int getRuleIndex()
        @Override
        public void enterRule(ParseTreeListener listener)
        @Override
        public void exitRule(ParseTreeListener listener)
    public final Y_parser_ruleContext y_parser_rule()
  You must be logged in to comment