hpricot text_transform, updated for hpricot 0.8.1

This is an updated version of Henrik Nyh's text_transform! library. It allows you to extract text nodes with ease. It broke with internal changes to Hpricot 0.8.1 (some internal variables changed names), but this fixes it.


# By Henrik Nyh <http: henrik.nyh.se=""> 2007-03-28.
# Based on http://vemod.net/code/hpricot_goodies/hpricot_text_gsub.rb.
# MODIFIED BY GARRY TAN ON 4/21 to support Hpricot 0.8.1
# Licensed under the same terms as Ruby.

require "rubygems"
require "hpricot"

module Posterous
  module Extensions
    module HpricotTextTransform
      module NodeWithChildrenExtension
        def text_transform!(options={}, &block)
          return if defined?(name) and name and (name.to_sym == options[:except] or Array(options[:except]).include?(name.to_sym))
          children.each { |c| c.text_transform!(options, &block) }
        end
      end
 
      module TextNodeExtension
        def text_transform!(options={}, &block)
          content.replace yield(content)
        end
      end

      module EmptyTransform
        def text_transform!(options={}, &block)
        end
      end
    end
  end
end
Hpricot::Doc.send(:include,  Posterous::Extensions::HpricotTextTransform::NodeWithChildrenExtension)
Hpricot::Elem.send(:include, Posterous::Extensions::HpricotTextTransform::NodeWithChildrenExtension)
Hpricot::Text.send(:include, Posterous::Extensions::HpricotTextTransform::TextNodeExtension)


Hpricot::Comment.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::BogusETag.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::XMLDecl.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::ETag.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::ProcIns.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::DocType.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)

if __FILE__ == $0
  require "test/unit"
  
  class HpricotTextTransformTest < Test::Unit::TestCase
    def assert_hpricot_transform(expected, input, options={}, &block)
      doc = Hpricot(input)
      doc.text_transform!(options, &block)
      assert_equal(expected, doc.to_s)
    end
    
    def test_with_gsub
      input    = 'xxx'
      expected = 'yyy'
      assert_hpricot_transform(expected, input, {}) { |text| text.gsub("x", "y") }
    end

    def test_with_reverse
      input    = 'hello world from <code>ruby</code>'
      expected = 'olleh morf dlrow <code>ybur</code>'
      assert_hpricot_transform(expected, input, {}) { |text| text.reverse }
    end

    def test_with_reverse_exclude_one_tag
      input    = 'hello world from <code>ruby</code>'
      expected = 'olleh morf dlrow <code>ruby</code>'
      assert_hpricot_transform(expected, input, {:except => :code}) { |text| text.reverse }
    end
    
    def test_with_reverse_exclude_multiple_tags
      input    = 'hello world from <code>ruby</code>'
      expected = 'hello morf dlrow <code>ruby</code>'
      assert_hpricot_transform(expected, input, {:except => [:a, :code]}) { |text| text.reverse }
    end
    
    def test_with_reverse_exclude_nested_tag
      input    = 'hello world from </http:><pre><code>ruby</code></pre>

'
      expected = 'olleh morf dlrow <pre><code>ruby</code></pre>

'
      assert_hpricot_transform(expected, input, {:except => :code}) { |text| text.reverse }
    end

  end
end
views