import java.util.*; public class HTMLEncoder { /** * The default constructor */ public HTMLEncoder() { } /* public static void main( String[] args ){ HTMLEncoder e = new HTMLEncoder(); try{ System.out.println( e.encode("This < > is a <b>test <u> test</B> test </u>")); }catch (Exception ex){ ex.printStackTrace(); } } */ /** * This method ensures that the tags in the html string are balanced and that any * tags outside a very small subset (bold, italic and underline) are rendered harmless by * substituting < and > for the brackets around these "invalid" tags. * * @param text the html string to be checked * @throws exception if the tags are not balanced */ public String encode( String text ) throws Exception{ Vector list = new Vector(); StringBuffer result; int index = 0; int oldIndex = 0; Tag t; String sub; //go and find the locations of all bold, underline and italic tags addAllTags( text, "b", list ); addAllTags( text, "i", list ); addAllTags( text, "u", list ); //sort the list (by location in the original text) Collections.sort(list); if (!isBalanced(list)) throw new Exception( "Unbalanced Tags" ); //convert the stray '<' amd '>' characters to < and > result = new StringBuffer(); Iterator i = list.iterator(); while (i.hasNext()){ t = (Tag)i.next(); index = t.getLocation(); sub = text.substring(oldIndex, index); sub = replace( sub, "<", "<" ); sub = replace( sub, ">", ">" ); result.append(sub); result.append(t.getTag()); oldIndex = index + t.getTag().length(); } //add the text after the last tag sub = text.substring(oldIndex); sub = replace( sub, "<", "<" ); sub = replace( sub, ">", ">" ); result.append(sub); return result.toString(); } /** * a simple utility function to replace all occurrences of one string with another * * @param source the string where the replacing will take place * @param oldString the string to search for in source * @param newString the string that is to replace oldString within source * @return the new string */ private String replace( String source, String oldString, String newString ){ int length = oldString.length(); int oldIndex = 0; int index = source.indexOf( oldString ); StringBuffer result = new StringBuffer(); while (index>=0){ result.append( source.substring(oldIndex, index) ); result.append( newString ); oldIndex = index + length; index = source.indexOf( oldString, index + 1); } result.append( source.substring(oldIndex)); return result.toString(); } /** * Given a list of tags, make sure there is the proper number of * closing and opening tags */ private boolean isBalanced( List list ){ //each open tag has a value of 1 //each close tag has a value of -1 //iterate through the tags and keep a total of the open and // close tags of each type HashMap table = new HashMap(); Iterator i = list.iterator(); int value; Tag t; Integer totalValue; while( i.hasNext()){ //get the tag t = (Tag)i.next(); //assign it a value value = t.isCloseTag()?-1:1; //the hash table stores the totals by tag name if (table.containsKey(t.getTagContent())){ totalValue = (Integer)table.get(t.getTagContent()); totalValue = new Integer( totalValue.intValue() + value ); } else { totalValue = new Integer( value ); } table.put(t.getTagContent(), totalValue); } //go through the hashtable and make sure all values are zero i = table.values().iterator(); while (i.hasNext()){ if ( ((Integer)i.next()).intValue() != 0 ) return false; } return true; } /** * Search within a string for all opening and closing tags of a * particular type and add their locations to a List collection * * @param text the source string * @param tag the tag to be looking for ("b" will be converted to <b> </b> <B> and </B>) * @param list the list to add the tags to */ private void addAllTags( String text, String tag, List list ){ //create the open and close tags String openTag = "<" + tag + ">"; String closeTag = "</" + tag + ">"; //add them in both upper and lower case formats addTags( text, openTag.toLowerCase(), list ); addTags( text, openTag.toUpperCase(), list ); addTags( text, closeTag.toLowerCase(), list ); addTags( text, closeTag.toUpperCase(), list ); } /** * find all occurences of a specific tag within a string and add them to a list * * @param text the html source string * @param tag the tag (e.g., <b>) * @param list the list to add the tags to */ private void addTags( String text, String tag, List list ){ int index = text.indexOf(tag,0); while (index >= 0){ list.add( new Tag( tag.toUpperCase(), index )); index = text.indexOf( tag, index+1 ); } } /** * Am inner class used to represent information about an HTML tag */ class Tag implements Comparable{ private String type; private int location; private String tag; public String getTagContent(){ if (this.isCloseTag()){ return tag.substring(2, tag.length()-1); } else { return tag.substring(1, tag.length()-1); } } public boolean isCloseTag(){ return (tag.charAt(1)=='/'); } public int getLocation(){ return location; } public String getTag(){ return tag; } Tag( String tag, int loc ){ this.tag = tag; this.location = loc; } //the method that implements the Comparable interface public int compareTo( Object o ){ return compareTo( (Tag)o ); } public int compareTo( Tag anotherTag ){ return this.location - anotherTag.getLocation(); } public String toString(){ return location + ": " + tag + " " + this.getTagContent(); } } }