Sophie

Sophie

distrib > Mandriva > 2010.0 > i586 > media > contrib-release > by-pkgid > d8a50ad1f2c695175a86b4ac856fd8af > files > 128

eclipse-quickrex-3.5.0-0.8.3mdv2010.0.i586.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">

<html>
  <head>
    <meta name="generator" content="HTML Tidy, see www.w3.org">
    <meta http-equiv="Content-Type" content=
    "text/html; charset=iso-8859-1">
		<LINK REL="STYLESHEET" HREF="book.css" CHARSET="ISO-8859-1" TYPE="text/css">

    <title>How QuickREx evaluates the Regular Expressions</title>
  </head>

  <body>
<!--
<h2>Table of Contents</h2>

<A href="using.html">Using the QuickREx Plugin</A>
<ul>
	<li><A href="quickRExView.html">Using the QuickREx View for testing and evaluating regular expressions</A></li>
	<ul>
	  <li><A href="persistence.html">Saving, Loading and Organizing Regular Expressions and Test-Texts</A></li>
		<li><b>How QuickREx evaluates the Regular Expressions</b></li>
	</ul>		
	<li><A href="reLibView.html">Using the Reg. Exp. Library View for keeping and organizing regular expressions</A></li>
	<li><A href="settings.html">Changing Color- and Font-settings</A></li>
	<li><A href="completions.html">Customizing TAB-completion</A></li>
</ul>
-->
    <h1>How QuickREx evaluates the Regular Expressions</h1>

		<p>QuickREx offers the most widely used implementations of Regular Expressions in the Java-world:</p>
		<ul>
		<li>JDK Regular Expressions (available since 1.4) - see <A href="http://java.sun.com" target="_blank">java.sun.com</A></li>
		<li>Jakarta ORO Perl-style Regular Expressions (ORO-Version 2.0.8, supports Perl 5.003 Regular Expressions) - see <A href="http://jakarta.apache.org/oro/index.html" target="_blank">jakarta.apache.org/oro/index.html</A></li>
		<li>Jakarta ORO Awk-style Regular Expressions (ORO-Version 2.0.8) - see <A href="http://jakarta.apache.org/oro/index.html" target="_blank">jakarta.apache.org/oro/index.html</A></li>
		<li>JRegex Regular Expressions (Version 1.2_01, supports Perl 5.6 Regular Expressions) - see <A href="http://jregex.sourceforge.net/" target="_blank">jregex.sourceforge.net/</A></li>
		<li>Jakarta-Regexp Regular Expressions (Version 1.4) - see <A href="http://jakarta.apache.org/regexp/index.html" target="_blank">jakarta.apache.org/regexp/index.html</A></li>
		</ul>
		
		<p>Since the APIs of the variants are slightly different, a common abstraction
		is used to hold information about matches and groups. The following interface abstracts
		a regular expression evaluated against a specific text and leading to a number of matches and groups:</p>
		
		<pre>
package de.babe.eclipse.plugins.quickREx.regexp;

/**
 * Abstracts matches in a text.
 * 
 * @author bastian.bergerhoff
 */
public interface MatchSet {

  /**
   * Returns <code>true</code> if and only if there is a next match
   * in this <code>MatchSet</code>. Acts like <code>next()</code> in
   * an enumeration in that it causes the whole instance state to be 
   * centered around the next match.
   * 
   * @return <code>true</code> if and only if there is a next match
   */
  public boolean nextMatch();

  /**
   * Returns the start-offset of the current match.
   * 
   * @return the start-offset of the current match
   */
  public int start();

  /**
   * Returns the end-offset of the current match.
   * 
   * @return the end-offset of the current match
   */
 public int end();

 /**
  * Returns the number of groups in the current match.
  * 0 is returned if there are no groups - the match itself
  * does not count as a group.
  * 
  * @return the number of groups in the current match
  */
  public int groupCount();

  /**
   * Returns the String-contents of the group with the passed
   * index. 
   *  
   * @param groupIndex the index of the group
   * @return the String-contents of the group
   */
  public String groupContents(int groupIndex);

  /**
   * Returns the start-offset of the group with the passed
   * index.
   * 
   * @param groupIndex the index of the group
   * @return the start-offset of the group
   */
  public int groupStart(int groupIndex);

  /**
   * Returns the end-offset of the group with the passed
   * index.
   * 
   * @param groupIndex the index of the group
   * @return the end-offset of the group
   */
  public int groupEnd(int groupIndex);

}
		</pre>

		<p>There is an implementation for the JDK-variant and an abstract base-implementation plus
		two concrete implementations for the ORO-variants. The last only differ in their constructor,
		where Awk- or Perl-Compilers are used as requested. As an example implementation, consider the
		JDK-variant:</p>
		
		<pre>
package de.babe.eclipse.plugins.quickREx.regexp.jdk;

import java.util.Collection;
import java.util.Iterator;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import de.babe.eclipse.plugins.quickREx.regexp.Flag;
import de.babe.eclipse.plugins.quickREx.regexp.MatchSet;

/**
 * MatchSet using JDK-regular expressions.
 * 
 * @author bastian.bergerhoff, andreas.studer
 */
public class JavaMatchSet implements MatchSet {

  private final Pattern pattern;
  private final Matcher matcher;

  private final static Collection flags = new Vector();
  
  static {
    flags.add(JavaFlag.JDK_CANON_EQ);
    flags.add(JavaFlag.JDK_CASE_INSENSITIVE);
    flags.add(JavaFlag.JDK_COMMENTS);
    flags.add(JavaFlag.JDK_DOTALL);
    flags.add(JavaFlag.JDK_MULTILINE);
    flags.add(JavaFlag.JDK_UNICODE_CASE);
    flags.add(JavaFlag.JDK_UNIX_LINES);
  }
  
  /**
   * Returns a Collection of all Compiler-Flags the JDK-implementation
   * knows about.
   * 
   * @return a Collection of all Compiler-Flags the JDK-implementation
   * knows about
   */
  public static Collection getAllFlags() {
    return flags;
  }
  
  /**
   * The constructor - uses JDK-regular expressions
   * to evaluate the passed regular expression against
   * the passed text.
   * 
   * @param regExp the regular expression
   * @param text the text to evaluate regExp against
   * @param flags a Collection of Flags to pass to the Compiler
   */
  public JavaMatchSet(String regExp, String text, Collection flags) {
    int iFlags = 0;
    for (Iterator iter = flags.iterator(); iter.hasNext();) {
      Flag element = (Flag)iter.next();
      iFlags = iFlags | element.getFlag();
    }
    pattern = Pattern.compile(regExp, iFlags);
    matcher = pattern.matcher(text);
  }

  /* (non-Javadoc)
   * @see de.babe.eclipse.plugins.quickREx.regexp.MatchSet#nextMatch()
   */
  public boolean nextMatch() {
    return matcher.find();
  }

  /* (non-Javadoc)
   * @see de.babe.eclipse.plugins.quickREx.regexp.MatchSet#start()
   */
  public int start() {
    return matcher.start();
  }

  /* (non-Javadoc)
   * @see de.babe.eclipse.plugins.quickREx.regexp.MatchSet#end()
   */
  public int end() {
    return matcher.end();
  }

  /* (non-Javadoc)
   * @see de.babe.eclipse.plugins.quickREx.regexp.MatchSet#groupCount()
   */
  public int groupCount() {
    return matcher.groupCount();
  }

  /* (non-Javadoc)
   * @see de.babe.eclipse.plugins.quickREx.regexp.MatchSet#groupContents(int)
   */
  public String groupContents(int groupIndex) {
    return matcher.group(groupIndex);
  }

  /* (non-Javadoc)
   * @see de.babe.eclipse.plugins.quickREx.regexp.MatchSet#groupStart(int)
   */
  public int groupStart(int groupIndex) {
    return matcher.start(groupIndex);
  }

  /* (non-Javadoc)
   * @see de.babe.eclipse.plugins.quickREx.regexp.MatchSet#groupEnd(int)
   */
  public int groupEnd(int groupIndex) {
    return matcher.end(groupIndex);
  }
}
		</pre>
		
		<p>The MatchSets are then used to loop over and work out matches and groups:</p>
		
		<pre>
MatchSet matches = MatchSetFactory.createMatchSet(QuickRExPlugin.getDefault().getREFlavour(), p_RegExp, p_testText, flags);
matchData = new Vector();
while (matches.nextMatch()) {
	Match match = new Match(matches.start(), matches.end());
	for (int g = 0; g&lt;matches.groupCount(); g++) {
		match.addGroup(new Group(g+1, matches.groupContents(g+1), matches.groupStart(g+1), matches.groupEnd(g+1)));
	}
	matchData.add(match);
}
		</pre>

		<p>where <strong>Match</strong> and <strong>Group</strong> are abstractions of matches and groups.</p>		
				
  </body>
</html>