added the ability for the HTML checker to keep track of internal and external
references in any post, so we can do trackbacks
This commit is contained in:
		
							parent
							
								
									7e72ec21d0
								
							
						
					
					
						commit
						4c5c7ffe85
					
				@ -9,9 +9,9 @@
 | 
			
		||||
 * 
 | 
			
		||||
 * The Original Code is the Venice Web Communities System.
 | 
			
		||||
 * 
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 | 
			
		||||
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 | 
			
		||||
 * Copyright (C) 2001-02 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * 
 | 
			
		||||
 * Contributor(s): 
 | 
			
		||||
 */
 | 
			
		||||
@ -40,7 +40,7 @@ public class PostLinkRewriter implements Rewriter
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  private GlobalSite globalsite;         // global site containing utilities
 | 
			
		||||
  private final GlobalSite globalsite;         // global site containing utilities
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Constructor
 | 
			
		||||
@ -60,7 +60,7 @@ public class PostLinkRewriter implements Rewriter
 | 
			
		||||
 | 
			
		||||
  private static final String buildPostLink(PostLinkDecoder pl, PostLinkDecoderContext ctxt)
 | 
			
		||||
  {
 | 
			
		||||
    StringBuffer b = new StringBuffer(URI_PREFIX);
 | 
			
		||||
    StringBuffer b = new StringBuffer();
 | 
			
		||||
    boolean started = false;
 | 
			
		||||
    if (pl.getCommunity()==null)
 | 
			
		||||
      b.append(ctxt.getCommunityName());
 | 
			
		||||
@ -180,14 +180,18 @@ public class PostLinkRewriter implements Rewriter
 | 
			
		||||
 | 
			
		||||
    } // end catch
 | 
			
		||||
 | 
			
		||||
    // build the post link and add it as an internal reference
 | 
			
		||||
    String link = buildPostLink(pl,ctxt);
 | 
			
		||||
    svc.addInternalReference(link);
 | 
			
		||||
 | 
			
		||||
    // build the necessary markup and return it
 | 
			
		||||
    StringBuffer open_a = new StringBuffer("<A HREF=\"");
 | 
			
		||||
    open_a.append(buildPostLink(pl,ctxt)).append("\"");
 | 
			
		||||
    StringBuffer open_a = new StringBuffer("<a href=\"");
 | 
			
		||||
    open_a.append(URI_PREFIX).append(link).append("\"");
 | 
			
		||||
    String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
 | 
			
		||||
    if (!(StringUtil.isStringEmpty(catenate)))
 | 
			
		||||
      open_a.append(' ').append(catenate);
 | 
			
		||||
    open_a.append('>');
 | 
			
		||||
    return new MarkupData(open_a.toString(),data,"</A>");
 | 
			
		||||
    return new MarkupData(open_a.toString(),data,"</a>");
 | 
			
		||||
 | 
			
		||||
  } // end rewrite
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -17,6 +17,8 @@
 | 
			
		||||
 */
 | 
			
		||||
package com.silverwrist.venice.htmlcheck;
 | 
			
		||||
 | 
			
		||||
import java.util.Set;
 | 
			
		||||
 | 
			
		||||
public interface HTMLChecker
 | 
			
		||||
{
 | 
			
		||||
  public void append(String str) throws AlreadyFinishedException;
 | 
			
		||||
@ -37,4 +39,8 @@ public interface HTMLChecker
 | 
			
		||||
 | 
			
		||||
  public void setContextValue(String name, Object val);
 | 
			
		||||
 | 
			
		||||
  public Set getExternalReferences() throws NotYetFinishedException;
 | 
			
		||||
 | 
			
		||||
  public Set getInternalReferences() throws NotYetFinishedException;
 | 
			
		||||
 | 
			
		||||
} // end interface HTMLChecker
 | 
			
		||||
 | 
			
		||||
@ -9,18 +9,24 @@
 | 
			
		||||
 * 
 | 
			
		||||
 * The Original Code is the Venice Web Community System.
 | 
			
		||||
 * 
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 | 
			
		||||
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 | 
			
		||||
 * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * 
 | 
			
		||||
 * Contributor(s): 
 | 
			
		||||
 */
 | 
			
		||||
package com.silverwrist.venice.htmlcheck;
 | 
			
		||||
 | 
			
		||||
import java.net.URL;
 | 
			
		||||
 | 
			
		||||
public interface RewriterServices
 | 
			
		||||
{
 | 
			
		||||
  public abstract String getRewriterAttrValue(String name);
 | 
			
		||||
  public String getRewriterAttrValue(String name);
 | 
			
		||||
 | 
			
		||||
  public abstract Object getRewriterContextValue(String name);
 | 
			
		||||
  public Object getRewriterContextValue(String name);
 | 
			
		||||
 | 
			
		||||
  public void addExternalReference(URL ref);
 | 
			
		||||
 | 
			
		||||
  public void addInternalReference(String ref);
 | 
			
		||||
 | 
			
		||||
} // end interface RewriterServices
 | 
			
		||||
 | 
			
		||||
@ -9,21 +9,33 @@
 | 
			
		||||
 * 
 | 
			
		||||
 * The Original Code is the Venice Web Community System.
 | 
			
		||||
 * 
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 | 
			
		||||
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 | 
			
		||||
 * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * 
 | 
			
		||||
 * Contributor(s): 
 | 
			
		||||
 */
 | 
			
		||||
package com.silverwrist.venice.htmlcheck.filters;
 | 
			
		||||
 | 
			
		||||
import java.util.*;
 | 
			
		||||
import org.apache.log4j.Logger;
 | 
			
		||||
import org.apache.regexp.*;
 | 
			
		||||
import com.silverwrist.util.*;
 | 
			
		||||
import com.silverwrist.venice.htmlcheck.Rewriter;
 | 
			
		||||
import com.silverwrist.venice.htmlcheck.RewriterServices;
 | 
			
		||||
import com.silverwrist.venice.htmlcheck.MarkupData;
 | 
			
		||||
 | 
			
		||||
public class EmailRewriter implements Rewriter
 | 
			
		||||
{
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Static data members
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  private static Logger logger = Logger.getLogger(EmailRewriter.class);
 | 
			
		||||
 | 
			
		||||
  private static REProgram s_match = null;
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Constructor
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
@ -46,23 +58,42 @@ public class EmailRewriter implements Rewriter
 | 
			
		||||
 | 
			
		||||
  public MarkupData rewrite(String data, RewriterServices svc)
 | 
			
		||||
  {
 | 
			
		||||
    int at_pos = data.indexOf('@');
 | 
			
		||||
    if ((at_pos<=0) || (at_pos==(data.length()-1)))
 | 
			
		||||
      return null;
 | 
			
		||||
 | 
			
		||||
    // TODO: put in more validation checking
 | 
			
		||||
    RE m = new RE(s_match,RE.MATCH_CASEINDEPENDENT);
 | 
			
		||||
    if (!(m.match(data)))
 | 
			
		||||
      return null;  // not a valid E-mail address
 | 
			
		||||
 | 
			
		||||
    // build the <A> tag (the gnarliest part)
 | 
			
		||||
    StringBuffer open_a = new StringBuffer("<A HREF=\"mailto:");
 | 
			
		||||
    StringBuffer open_a = new StringBuffer("<a href=\"mailto:");
 | 
			
		||||
    open_a.append(data).append("\"");
 | 
			
		||||
    String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
 | 
			
		||||
    if ((catenate!=null) && (catenate.length()>0))
 | 
			
		||||
    if (!(StringUtil.isStringEmpty(catenate)))
 | 
			
		||||
      open_a.append(' ').append(catenate);
 | 
			
		||||
    open_a.append('>');
 | 
			
		||||
 | 
			
		||||
    // return the markup data back to the checker
 | 
			
		||||
    return new MarkupData(open_a.toString(),data,"</A>");
 | 
			
		||||
    return new MarkupData(open_a.toString(),data,"</a>");
 | 
			
		||||
 | 
			
		||||
  } // end rewrite
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Static initializer
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  static
 | 
			
		||||
  {
 | 
			
		||||
    try
 | 
			
		||||
    { // compile our regular expression
 | 
			
		||||
      RECompiler compiler = new RECompiler();
 | 
			
		||||
      s_match = compiler.compile("^[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$");
 | 
			
		||||
 | 
			
		||||
    } // end try
 | 
			
		||||
    catch (RESyntaxException e)
 | 
			
		||||
    { // shouldn't happen
 | 
			
		||||
      logger.fatal("caught RESyntaxException in EmailRewriter initializer",e);
 | 
			
		||||
 | 
			
		||||
    } // end catch
 | 
			
		||||
 | 
			
		||||
  } // end static initializer
 | 
			
		||||
 | 
			
		||||
} // end class EmailRewriter
 | 
			
		||||
 | 
			
		||||
@ -9,29 +9,87 @@
 | 
			
		||||
 * 
 | 
			
		||||
 * The Original Code is the Venice Web Community System.
 | 
			
		||||
 * 
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 | 
			
		||||
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 | 
			
		||||
 * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * 
 | 
			
		||||
 * Contributor(s): 
 | 
			
		||||
 */
 | 
			
		||||
package com.silverwrist.venice.htmlcheck.filters;
 | 
			
		||||
 | 
			
		||||
import java.net.*;
 | 
			
		||||
import java.util.*;
 | 
			
		||||
import org.apache.log4j.Logger;
 | 
			
		||||
import org.apache.regexp.*;
 | 
			
		||||
import com.silverwrist.util.*;
 | 
			
		||||
import com.silverwrist.venice.htmlcheck.Rewriter;
 | 
			
		||||
import com.silverwrist.venice.htmlcheck.RewriterServices;
 | 
			
		||||
import com.silverwrist.venice.htmlcheck.MarkupData;
 | 
			
		||||
 | 
			
		||||
public class URLRewriter implements Rewriter
 | 
			
		||||
{
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Internal class containing URL elements.
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  private static class URLElement
 | 
			
		||||
  {
 | 
			
		||||
    private REProgram m_match;
 | 
			
		||||
    private String m_prefix;
 | 
			
		||||
 | 
			
		||||
    URLElement(String pattern, String prefix)
 | 
			
		||||
    {
 | 
			
		||||
      try
 | 
			
		||||
      { // fill the classes
 | 
			
		||||
	m_match = COMPILER.compile(pattern);
 | 
			
		||||
	m_prefix = prefix;
 | 
			
		||||
 | 
			
		||||
      } // end try
 | 
			
		||||
      catch (RESyntaxException e)
 | 
			
		||||
      { // shouldn't happen
 | 
			
		||||
	logger.fatal("got RESyntaxException in URLElement",e);
 | 
			
		||||
 | 
			
		||||
      } // end catch
 | 
			
		||||
 | 
			
		||||
    } // end constructor
 | 
			
		||||
 | 
			
		||||
    String eval(String input)
 | 
			
		||||
    {
 | 
			
		||||
      RE m = new RE(m_match,RE.MATCH_CASEINDEPENDENT);
 | 
			
		||||
      if (m.match(input))
 | 
			
		||||
	return m_prefix + input;
 | 
			
		||||
      else
 | 
			
		||||
	return null;
 | 
			
		||||
 | 
			
		||||
    } // end eval
 | 
			
		||||
 | 
			
		||||
  } // end class URLElement
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Static data members
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  private static final String NULLSTRING = "";
 | 
			
		||||
  private static Hashtable prefix_list = null;
 | 
			
		||||
  private static boolean set_up = true;
 | 
			
		||||
  private static Logger logger = Logger.getLogger(URLRewriter.class);
 | 
			
		||||
 | 
			
		||||
  private static final RECompiler COMPILER = new RECompiler();
 | 
			
		||||
  private static final String[] SETUP_DATA =
 | 
			
		||||
    {
 | 
			
		||||
      "^http://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                                 "",
 | 
			
		||||
      "^ftp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                                  "",
 | 
			
		||||
      "^gopher://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                               "",
 | 
			
		||||
      "^mailto:[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$", "",
 | 
			
		||||
      "^news:[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+$",                                  "",
 | 
			
		||||
      "^nntp://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                                 "",
 | 
			
		||||
      "^telnet://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                               "",
 | 
			
		||||
      "^tn3270://[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)+",                               "",
 | 
			
		||||
      "^www\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*",                                  "http://",
 | 
			
		||||
      "^ftp\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*",                                  "ftp://",
 | 
			
		||||
      "^gopher\\.[A-Za-z0-9_-]+(?:\\.[A-Za-z0-9_-]+)*",                               "gopher://"
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  private static final List KNOWN_ELEMENTS;
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Constructor
 | 
			
		||||
@ -39,40 +97,9 @@ public class URLRewriter implements Rewriter
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  public URLRewriter()
 | 
			
		||||
  {
 | 
			
		||||
    setUpPrefixes();  // make sure the prefix data is set up
 | 
			
		||||
 | 
			
		||||
  { // do nothing
 | 
			
		||||
  } // end constructor
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Internal functions
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  private static void setUpPrefixes()
 | 
			
		||||
  {
 | 
			
		||||
    if (set_up)
 | 
			
		||||
    { // allocate the hash table
 | 
			
		||||
      set_up = false;
 | 
			
		||||
      prefix_list = new Hashtable(10,0.9F);
 | 
			
		||||
 | 
			
		||||
      // fill it with the proper URL prefixes
 | 
			
		||||
      prefix_list.put("http:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("ftp:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("gopher:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("mailto:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("news:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("nntp:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("telnet:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("tn3270:",NULLSTRING);
 | 
			
		||||
      prefix_list.put("www.",new String("http://"));
 | 
			
		||||
      prefix_list.put("ftp.",new String("ftp://"));
 | 
			
		||||
      prefix_list.put("gopher.",new String("gopher://"));
 | 
			
		||||
 | 
			
		||||
    } // end if
 | 
			
		||||
 | 
			
		||||
  } // end setUpPrefixes
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Implementations from interface Rewriter
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
@ -86,29 +113,53 @@ public class URLRewriter implements Rewriter
 | 
			
		||||
 | 
			
		||||
  public MarkupData rewrite(String data, RewriterServices svc)
 | 
			
		||||
  {
 | 
			
		||||
    Enumeration prefixes = prefix_list.keys();
 | 
			
		||||
    while (prefixes.hasMoreElements())
 | 
			
		||||
    { // get the next prefix and compare against the beginning of the string
 | 
			
		||||
      String pfx = (String)(prefixes.nextElement());
 | 
			
		||||
      if (data.regionMatches(true,0,pfx,0,pfx.length()))
 | 
			
		||||
      { // good enough!  build the open <A> tag (the gnarliest part of the markup)
 | 
			
		||||
	StringBuffer open_a = new StringBuffer("<A HREF=\"");
 | 
			
		||||
	String catenate = (String)(prefix_list.get(pfx));
 | 
			
		||||
	open_a.append(catenate).append(data).append("\"");
 | 
			
		||||
	catenate = svc.getRewriterAttrValue("ANCHORTAIL");
 | 
			
		||||
	if ((catenate!=null) && (catenate.length()>0))
 | 
			
		||||
    for (Iterator it=KNOWN_ELEMENTS.iterator(); it.hasNext(); )
 | 
			
		||||
    { // test each element in turn
 | 
			
		||||
      URLElement ue = (URLElement)(it.next());
 | 
			
		||||
      String s = ue.eval(data);
 | 
			
		||||
      if (s!=null)
 | 
			
		||||
      { // got a match!  record the external reference and build the open <A> tag
 | 
			
		||||
	try
 | 
			
		||||
	{ // create URL and add it
 | 
			
		||||
	  if (s.toLowerCase().startsWith("http:"))
 | 
			
		||||
	    svc.addExternalReference(new URL(s));
 | 
			
		||||
 | 
			
		||||
	} // end try
 | 
			
		||||
	catch (MalformedURLException e)
 | 
			
		||||
	{ // forget it
 | 
			
		||||
	} // end catch
 | 
			
		||||
 | 
			
		||||
	StringBuffer open_a = new StringBuffer("<a href=\"");
 | 
			
		||||
	open_a.append(s).append("\"");
 | 
			
		||||
	String catenate = svc.getRewriterAttrValue("ANCHORTAIL");
 | 
			
		||||
	if (!(StringUtil.isStringEmpty(catenate)))
 | 
			
		||||
	  open_a.append(' ').append(catenate);
 | 
			
		||||
	open_a.append('>');
 | 
			
		||||
 | 
			
		||||
	// here's how you mark it up!
 | 
			
		||||
	return new MarkupData(open_a.toString(),data,"</A>");
 | 
			
		||||
	return new MarkupData(open_a.toString(),data,"</a>");
 | 
			
		||||
 | 
			
		||||
      } // end if
 | 
			
		||||
 | 
			
		||||
    } // end while
 | 
			
		||||
    } // end for
 | 
			
		||||
 | 
			
		||||
    return null;  // sorry, no can do
 | 
			
		||||
 | 
			
		||||
  } // end rewrite
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Static initializer
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  static
 | 
			
		||||
  {
 | 
			
		||||
    ArrayList tmp = new ArrayList();
 | 
			
		||||
    for (int i=0; i<SETUP_DATA.length; i+=2)
 | 
			
		||||
      tmp.add(new URLElement(SETUP_DATA[i],SETUP_DATA[i + 1]));
 | 
			
		||||
    tmp.trimToSize();
 | 
			
		||||
    KNOWN_ELEMENTS = Collections.unmodifiableList(tmp);
 | 
			
		||||
 | 
			
		||||
  } // end static initializer
 | 
			
		||||
 | 
			
		||||
} // end class URLRewriter
 | 
			
		||||
 | 
			
		||||
@ -9,20 +9,26 @@
 | 
			
		||||
 * 
 | 
			
		||||
 * The Original Code is the Venice Web Community System.
 | 
			
		||||
 * 
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 | 
			
		||||
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 | 
			
		||||
 * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * 
 | 
			
		||||
 * Contributor(s): 
 | 
			
		||||
 */
 | 
			
		||||
package com.silverwrist.venice.htmlcheck.impl;
 | 
			
		||||
 | 
			
		||||
import java.net.URL;
 | 
			
		||||
 | 
			
		||||
public interface HTMLCheckerBackend
 | 
			
		||||
{
 | 
			
		||||
  public abstract String getCheckerAttrValue(String name);
 | 
			
		||||
  public String getCheckerAttrValue(String name);
 | 
			
		||||
 | 
			
		||||
  public abstract void sendTagMessage(String msg);
 | 
			
		||||
  public void sendTagMessage(String msg);
 | 
			
		||||
 | 
			
		||||
  public abstract Object getCheckerContextValue(String name);
 | 
			
		||||
  public Object getCheckerContextValue(String name);
 | 
			
		||||
 | 
			
		||||
  public void addExternalReference(URL ref);
 | 
			
		||||
 | 
			
		||||
  public void addInternalReference(String ref);
 | 
			
		||||
 | 
			
		||||
} // end interface HTMLCheckerBackend
 | 
			
		||||
 | 
			
		||||
@ -17,6 +17,7 @@
 | 
			
		||||
 */
 | 
			
		||||
package com.silverwrist.venice.htmlcheck.impl;
 | 
			
		||||
 | 
			
		||||
import java.net.URL;
 | 
			
		||||
import java.util.*;
 | 
			
		||||
import org.apache.log4j.*;
 | 
			
		||||
import com.silverwrist.venice.htmlcheck.*;
 | 
			
		||||
@ -139,6 +140,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
  private ArrayList m_tag_rewriters = new ArrayList();      // tag rewriter instances
 | 
			
		||||
  private ArrayList m_paren_rewriters = new ArrayList();    // paren rewriter instances
 | 
			
		||||
  private HashMap m_context_data = new HashMap();   // context variables
 | 
			
		||||
  private HashSet m_external_references = new HashSet();  // list of external references
 | 
			
		||||
  private HashSet m_internal_references = new HashSet();  // list of internal references
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Constructor
 | 
			
		||||
@ -161,7 +164,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Returns <CODE>true</CODE> if this character belongs as part of a word, <CODE>false</CODE> if not.
 | 
			
		||||
   * Returns <code>true</code> if this character belongs as part of a word, <code>false</code> if not.
 | 
			
		||||
   *
 | 
			
		||||
   * @param ch Character to be tested.
 | 
			
		||||
   * @return See above.
 | 
			
		||||
@ -210,8 +213,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
  } // end getRunLength
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Copies the <CODE>Rewriter</CODE> objects from an outside list to an internal list, wrapping
 | 
			
		||||
   * named rewriters in <CODE>CountingRewriter</CODE> objects as appropriate.
 | 
			
		||||
   * Copies the <code>Rewriter</code> objects from an outside list to an internal list, wrapping
 | 
			
		||||
   * named rewriters in <code>CountingRewriter</code> objects as appropriate.
 | 
			
		||||
   *
 | 
			
		||||
   * @param dest Destination to copy rewriters to.
 | 
			
		||||
   * @param source List to copy rewriters from.
 | 
			
		||||
@ -290,8 +293,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
   *
 | 
			
		||||
   * @param ch Character to output.
 | 
			
		||||
   * @param filters List of filters to use to attempt to process the character.
 | 
			
		||||
   * @param count_cols <CODE>true</CODE> if the character output adds to the column counter,
 | 
			
		||||
   *                   <CODE>false</CODE> if not.
 | 
			
		||||
   * @param count_cols <code>true</code> if the character output adds to the column counter,
 | 
			
		||||
   *                   <code>false</code> if not.
 | 
			
		||||
   */
 | 
			
		||||
  private final void emitChar(char ch, List filters, boolean count_cols)
 | 
			
		||||
  {
 | 
			
		||||
@ -841,7 +844,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
  } // end handleAsHTML
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Returns <CODE>true</CODE> if the temporary buffer contains the start of an HTML comment.  (The
 | 
			
		||||
   * Returns <code>true</code> if the temporary buffer contains the start of an HTML comment.  (The
 | 
			
		||||
   * leading and trailing angle brackets are assumed.)
 | 
			
		||||
   *
 | 
			
		||||
   * @return See above.
 | 
			
		||||
@ -853,7 +856,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
  } // end containsHTMLComment
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Returns <CODE>true</CODE> if the temporary buffer contains a complete HTML comment.  (The leading
 | 
			
		||||
   * Returns <code>true</code> if the temporary buffer contains a complete HTML comment.  (The leading
 | 
			
		||||
   * and trailing angle brackets are assumed.)
 | 
			
		||||
   *
 | 
			
		||||
   * @return See above.
 | 
			
		||||
@ -869,7 +872,7 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
  } // end containsCompleteHTMLComment
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Returns <CODE>true</CODE> if the temporary buffer contains an XML construct, i.e. a tag that
 | 
			
		||||
   * Returns <code>true</code> if the temporary buffer contains an XML construct, i.e. a tag that
 | 
			
		||||
   * contains a ':', and may or may not have a leading '/'.  (The leading and trailing angle brackets
 | 
			
		||||
   * are assumed.)
 | 
			
		||||
   *
 | 
			
		||||
@ -1381,6 +1384,8 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
    m_lines = 0;
 | 
			
		||||
    m_paren_level = 0;
 | 
			
		||||
    m_output_buffer = null;
 | 
			
		||||
    m_external_references.clear();
 | 
			
		||||
    m_internal_references.clear();
 | 
			
		||||
    killState();
 | 
			
		||||
 | 
			
		||||
    // Also reset all the counters.
 | 
			
		||||
@ -1441,6 +1446,28 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
 | 
			
		||||
  } // end setContextValue
 | 
			
		||||
 | 
			
		||||
  public Set getExternalReferences() throws NotYetFinishedException
 | 
			
		||||
  {
 | 
			
		||||
    if (!m_finished)
 | 
			
		||||
      throw new NotYetFinishedException();
 | 
			
		||||
    if (m_external_references.isEmpty())
 | 
			
		||||
      return Collections.EMPTY_SET;
 | 
			
		||||
    HashSet rc = new HashSet(m_external_references);
 | 
			
		||||
    return Collections.unmodifiableSet(rc);
 | 
			
		||||
 | 
			
		||||
  } // end getExternalReferences
 | 
			
		||||
 | 
			
		||||
  public Set getInternalReferences() throws NotYetFinishedException
 | 
			
		||||
  {
 | 
			
		||||
    if (!m_finished)
 | 
			
		||||
      throw new NotYetFinishedException();
 | 
			
		||||
    if (m_internal_references.isEmpty())
 | 
			
		||||
      return Collections.EMPTY_SET;
 | 
			
		||||
    HashSet rc = new HashSet(m_internal_references);
 | 
			
		||||
    return Collections.unmodifiableSet(rc);
 | 
			
		||||
 | 
			
		||||
  } // end getInternalReferences
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Implementations from interface HTMLCheckerBackend
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
@ -1488,6 +1515,18 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
 | 
			
		||||
  } // end getCheckerContextValue
 | 
			
		||||
 | 
			
		||||
  public void addExternalReference(URL ref)
 | 
			
		||||
  {
 | 
			
		||||
    m_external_references.add(ref);
 | 
			
		||||
 | 
			
		||||
  } // end addExternalReference
 | 
			
		||||
 | 
			
		||||
  public void addInternalReference(String ref)
 | 
			
		||||
  {
 | 
			
		||||
    m_internal_references.add(ref);
 | 
			
		||||
 | 
			
		||||
  } // end addInternalReference
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Implementations from interface RewriterServices
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
@ -1505,4 +1544,6 @@ class HTMLCheckerImpl implements HTMLChecker, HTMLCheckerBackend, RewriterServic
 | 
			
		||||
 | 
			
		||||
  } // end getRewriterContextValue
 | 
			
		||||
 | 
			
		||||
  // addExternalReference is implemented as part of HTMLCheckerBackend
 | 
			
		||||
 | 
			
		||||
} // end class HTMLCheckerImpl
 | 
			
		||||
 | 
			
		||||
@ -9,14 +9,19 @@
 | 
			
		||||
 * 
 | 
			
		||||
 * The Original Code is the Venice Web Community System.
 | 
			
		||||
 * 
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@silcom.com>,
 | 
			
		||||
 * The Initial Developer of the Original Code is Eric J. Bowersox <erbo@ricochet.com>,
 | 
			
		||||
 * for Silverwrist Design Studios.  Portions created by Eric J. Bowersox are
 | 
			
		||||
 * Copyright (C) 2001 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * Copyright (C) 2001-2004 Eric J. Bowersox/Silverwrist Design Studios.  All Rights Reserved.
 | 
			
		||||
 * 
 | 
			
		||||
 * Contributor(s): 
 | 
			
		||||
 */
 | 
			
		||||
package com.silverwrist.venice.htmlcheck.impl;
 | 
			
		||||
 | 
			
		||||
import java.net.*;
 | 
			
		||||
import org.apache.log4j.Logger;
 | 
			
		||||
import org.apache.regexp.*;
 | 
			
		||||
import com.silverwrist.util.*;
 | 
			
		||||
 | 
			
		||||
class TagA extends BalancedTag
 | 
			
		||||
{
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
@ -24,21 +29,94 @@ class TagA extends BalancedTag
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  private static final String TARGET_ATTR = "TARGET";
 | 
			
		||||
  /** The instance of {@link org.apache.log4j.Logger Logger} for use by this class. */
 | 
			
		||||
  private static Logger logger = Logger.getLogger(TagA.class);
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Attributes
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  /** Regular expression program to look for "HREF=" attribute. */
 | 
			
		||||
  private REProgram m_href = null;
 | 
			
		||||
 | 
			
		||||
  /** Regular expression program to look for "TARGET=" attribute. */
 | 
			
		||||
  private REProgram m_target = null;
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Constructor
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Creates a new instance of <code>TagA</code>.
 | 
			
		||||
   */
 | 
			
		||||
  TagA()
 | 
			
		||||
  {
 | 
			
		||||
    super("A",false);
 | 
			
		||||
    try
 | 
			
		||||
    { // compile regular expressions
 | 
			
		||||
      RECompiler compiler = new RECompiler();
 | 
			
		||||
      m_href = compiler.compile("href\\s*=");
 | 
			
		||||
      m_target = compiler.compile("target\\s*=");
 | 
			
		||||
 | 
			
		||||
    } // end try
 | 
			
		||||
    catch (RESyntaxException e)
 | 
			
		||||
    { // shouldn't happen
 | 
			
		||||
      logger.fatal("got RESyntaxException in TagA",e);
 | 
			
		||||
 | 
			
		||||
    } // end catch
 | 
			
		||||
 | 
			
		||||
  } // end constructor
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * External operations
 | 
			
		||||
   * Internal operations
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Extracts an attribute value from the start of the string.  The attribute value may be enclosed
 | 
			
		||||
   * in quotes, or may simply be a series of nonblank characters delimited by blanks.
 | 
			
		||||
   *
 | 
			
		||||
   * @param s The string to extract the attribute value from.
 | 
			
		||||
   * @return The attribute value extracted.
 | 
			
		||||
   */
 | 
			
		||||
  private static final String extractAttribute(String s)
 | 
			
		||||
  {
 | 
			
		||||
    char[] a = s.toCharArray();
 | 
			
		||||
    int i = 0;
 | 
			
		||||
    while ((i<a.length) && Character.isWhitespace(a[i]))
 | 
			
		||||
      i++;
 | 
			
		||||
    if (i==a.length)
 | 
			
		||||
      return "";
 | 
			
		||||
    int st = i;
 | 
			
		||||
    if ((a[st]=='\'') || (a[st]=='\"'))
 | 
			
		||||
    { // find quoted string boundaries
 | 
			
		||||
      i++;
 | 
			
		||||
      while ((i<a.length) && (a[i]!=a[st]))
 | 
			
		||||
	i++;
 | 
			
		||||
      if (i==a.length)
 | 
			
		||||
	return "";
 | 
			
		||||
      st++;
 | 
			
		||||
 | 
			
		||||
    } // end if
 | 
			
		||||
    else
 | 
			
		||||
    { // skip over non-whitespace
 | 
			
		||||
      while ((i<a.length) && !(Character.isWhitespace(a[i])))
 | 
			
		||||
	i++;
 | 
			
		||||
      // if i==a.length, just take the "rest"
 | 
			
		||||
 | 
			
		||||
    } // end else
 | 
			
		||||
 | 
			
		||||
    if (i==a.length)
 | 
			
		||||
      return s.substring(st);
 | 
			
		||||
    else
 | 
			
		||||
      return s.substring(st,i);
 | 
			
		||||
 | 
			
		||||
  } // end extractAttribute
 | 
			
		||||
 | 
			
		||||
  /*--------------------------------------------------------------------------------
 | 
			
		||||
   * Overrides from class SimpleTag
 | 
			
		||||
   *--------------------------------------------------------------------------------
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
@ -47,77 +125,44 @@ class TagA extends BalancedTag
 | 
			
		||||
    if (is_closing)
 | 
			
		||||
      return contents;  // don't bother checking close tags
 | 
			
		||||
 | 
			
		||||
    // Skip over the initial word of the tag data, as that's the tag name.
 | 
			
		||||
    int i = 0;
 | 
			
		||||
    while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
 | 
			
		||||
      i++;
 | 
			
		||||
    // Pull out the HREF= attribute, as that's an "external reference" we need to keep track of.
 | 
			
		||||
    RE m = new RE(m_href,RE.MATCH_CASEINDEPENDENT);
 | 
			
		||||
    if (m.match(contents))
 | 
			
		||||
    { // get the attribute value
 | 
			
		||||
      try
 | 
			
		||||
      { // turn it into a URL and add it as an external reference
 | 
			
		||||
	String s = extractAttribute(contents.substring(m.getParenEnd(0)));
 | 
			
		||||
	if (!(StringUtil.isStringEmpty(s)) && s.toLowerCase().startsWith("http:"))
 | 
			
		||||
	{ // turn it into the URL and add it
 | 
			
		||||
	  URL ref = new URL(s);
 | 
			
		||||
	  context.addExternalReference(ref);
 | 
			
		||||
 | 
			
		||||
    // Search for the TARGET= attribute in the tag data.
 | 
			
		||||
	} // end if
 | 
			
		||||
	// else ignore me
 | 
			
		||||
 | 
			
		||||
      } // end try
 | 
			
		||||
      catch (MalformedURLException e)
 | 
			
		||||
      { // ignore this reference
 | 
			
		||||
      } // end catch
 | 
			
		||||
 | 
			
		||||
    } // end if
 | 
			
		||||
 | 
			
		||||
    // Look for the TARGET= attribute.
 | 
			
		||||
    boolean target_seen = false;
 | 
			
		||||
    while (i!=contents.length())
 | 
			
		||||
    { // skip over any whitespace between one attribute (or the name) and the next one
 | 
			
		||||
      while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
 | 
			
		||||
	i++;
 | 
			
		||||
      if (i==contents.length())
 | 
			
		||||
	break;  // reached end of string, all done searching
 | 
			
		||||
 | 
			
		||||
      // Mark the start of this attribute name and start skipping over it.
 | 
			
		||||
      int start_name = i;
 | 
			
		||||
      while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i)))
 | 
			
		||||
	     && (contents.charAt(i)!='='))
 | 
			
		||||
	i++;
 | 
			
		||||
 | 
			
		||||
      // We now know where the attribute name is, see if it's "TARGET".
 | 
			
		||||
      if ((i-start_name)==TARGET_ATTR.length())
 | 
			
		||||
      { // compare the substring to see if it's right
 | 
			
		||||
	String name = contents.substring(start_name,i);
 | 
			
		||||
	if (name.equalsIgnoreCase(TARGET_ATTR))
 | 
			
		||||
	{ // OK, we saw the TARGET tag in the list!  Bail out!
 | 
			
		||||
    m = new RE(m_target,RE.MATCH_CASEINDEPENDENT);
 | 
			
		||||
    if (m.match(contents))
 | 
			
		||||
    { // get the attribute value
 | 
			
		||||
      String s = extractAttribute(contents.substring(m.getParenEnd(0)));
 | 
			
		||||
      if (!(StringUtil.isStringEmpty(s)))
 | 
			
		||||
	target_seen = true;
 | 
			
		||||
	  break;
 | 
			
		||||
 | 
			
		||||
    } // end if
 | 
			
		||||
 | 
			
		||||
      } // end if
 | 
			
		||||
 | 
			
		||||
      while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
 | 
			
		||||
	i++;  // skip over whitespace at end of name but before the = sign
 | 
			
		||||
 | 
			
		||||
      if ((i<contents.length()) && (contents.charAt(i)=='='))
 | 
			
		||||
      { // skip over the = sign first
 | 
			
		||||
	i++;
 | 
			
		||||
	while ((i!=contents.length()) && Character.isWhitespace(contents.charAt(i)))
 | 
			
		||||
	  i++;  // skip over whitespace after the = sign
 | 
			
		||||
 | 
			
		||||
	if (i==contents.length())
 | 
			
		||||
	  break;  // reached end of string, all done searching
 | 
			
		||||
 | 
			
		||||
	if ((contents.charAt(i)=='\'') || (contents.charAt(i)=='\"'))
 | 
			
		||||
	{ // this is a quoted string - swallow it
 | 
			
		||||
	  char quote_char = contents.charAt(i++); // skip the quote part
 | 
			
		||||
	  while ((i!=contents.length()) && (contents.charAt(i)!=quote_char))
 | 
			
		||||
	    i++;  // skip over data between quotes
 | 
			
		||||
	  if (i!=contents.length())
 | 
			
		||||
	    i++;  // skip over last quote
 | 
			
		||||
 | 
			
		||||
	} // end if
 | 
			
		||||
	else
 | 
			
		||||
	{ // skip over a single word
 | 
			
		||||
	  while ((i!=contents.length()) && !(Character.isWhitespace(contents.charAt(i))))
 | 
			
		||||
	    i++;
 | 
			
		||||
 | 
			
		||||
	} // end else
 | 
			
		||||
 | 
			
		||||
      } // end if
 | 
			
		||||
      // else this tag had no value - just go on to the next one
 | 
			
		||||
 | 
			
		||||
    } // end while
 | 
			
		||||
 | 
			
		||||
    if (target_seen)
 | 
			
		||||
      return contents;  // no need to alter the string
 | 
			
		||||
 | 
			
		||||
    String tail = (String)(context.getCheckerAttrValue("ANCHORTAIL"));
 | 
			
		||||
    return new String(contents + " " + tail);
 | 
			
		||||
    return contents + " " + tail;
 | 
			
		||||
 | 
			
		||||
  } // end rewriteTagContents
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user