HTML Parserラッパ

HTML Parser - HTML Parserのラッパを作り始めた。
http://journal.mycom.co.jp/news/2005/06/15/029.html

方針は以下のような感じ

import org.htmlparser._
import org.htmlparser.util._
import org.htmlparser.filters._

//----------------------------------------------------
object HttpParser {
  object CSSSelector {
    def apply(selector:String) = new CssSelectorNodeFilter(selector)
  }

  class MyNodeFilter(nodeFilter:NodeFilter) {
    def |(q:NodeFilter) = new OrFilter(nodeFilter, q)
  }
  implicit def NodeSelectorToExtNodeSelector(p:NodeFilter) = new MyNodeFilter(p)

  class MyParser(parser:Parser) {
    def each(selector:NodeFilter)(f:Node => Unit) = {
      val nodelist = parser.parse(selector)
      val nodeit = nodelist.elements
      while (nodeit.hasMoreNodes) {
        val node = nodeit.nextNode
        f(node)
      }
    }
  }
  implicit def ParserToExtParser(p:Parser) = new MyParser(p)

  class MyTag(tag:Tag) {
    def klass = tag.getAttribute("class")
  }
  implicit def TagToExtTag(t:Tag) = new MyTag(t)
  
  class MyString(str:String) {
    def strip = str.replaceAll("(^\\s*|\\s*$)", "")
  }
  implicit def StringToExtString(s:String) = new MyString(s)
  
}

//----------------------------------------------------
object lingr_log_getter extends Application {
  import HttpParser._

  val url = "http://www.lingr.com/room/scala-ja/archives/2008/02/24"
  val parser = new Parser(url)

  val handleSelector    = CSSSelector("#messages .handleText")
  val msgSelector       = CSSSelector("#messages .messageTextContainer")
  val timestampSelector = CSSSelector("#messages .timestamp")
  val selector = (handleSelector | msgSelector | timestampSelector)

  parser.each(selector){node =>
    val n = node.getFirstChild
    val str = if (n != null) n.getText.strip else ""
    node match {
      case n:Tag if n.klass == "handleText"           => println("[" + str + "]")
      case n:Tag if n.klass == "timestamp"            => if (str != "") println("\n<" + str + ">")
      case n:Tag if n.klass == "messageTextContainer" => {
        if (n.getChildren.size > 1) {
          println("-----")
          println(n.getChildren.asString.strip)
          println("-----")
        } else
          println("  " + str)
      }
    }
  }
}