[ Back | Previous | Next ]

How to filter href's from a html file?

Package:
javax.swing.text.*
Product:
JDK
Release:
1.2
Related Links:
General
JEditorPane
PlainDocument
Comment:
**
 * Insert the method's description here.
 * Creation date: (11/23/00 10:39:44 PM)
 */
public void read() throws Exception {
 Reader in = new FileReader("index.html");
 //HTMLReader.MyParser p = new HTMLReader.MyParser(DTD.getDTD("HTML"));
 //p.parse(in);
 //HTMLEditorkit contains HTML parser. Creates structured Document
object that
 //models HTML: 'text storage model'
 HTMLEditorKit editorKit = new HTMLEditorKit();
 HTMLDocument HTMLDoc = (HTMLDocument)
editorKit.createDefaultDocument();

 // The Document class does not yet handle character sets properly
 HTMLDoc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
 try {

  //Create a reader on the HTML content of inputted page
  Reader HTMLReader = in;

  //Parse the HTML using EditorKit's read() method. Inserts content from
input
  //stream (above) into HTMLDoc
  editorKit.read(HTMLReader, HTMLDoc, 0);

  //Iterate through the elements of the HTML doc
  ElementIterator iterator = new ElementIterator(HTMLDoc);

  //Interface to describe structural piece of doc (token)
  javax.swing.text.Element elem;
  while ((elem = iterator.next()) != null) {
   SimpleAttributeSet sas = (SimpleAttributeSet)

   //StyleContext.NameAttribute attribute
   elem.getAttributes().getAttribute(HTML.Tag.A);
   if (sas != null) {
    System.out.println(sas.getAttribute(HTML.Attribute.HREF));
   }
  }
 } catch (Exception e) {
  e.printStackTrace();
 }
 System.exit(1);
}
1