This is an updated version of Henrik Nyh's text_transform! library. It allows you to extract text nodes with ease. It broke with internal changes to Hpricot 0.8.1 (some internal variables changed names), but this fixes it.
# By Henrik Nyh <http: henrik.nyh.se=""> 2007-03-28.
# Based on http://vemod.net/code/hpricot_goodies/hpricot_text_gsub.rb.
# MODIFIED BY GARRY TAN ON 4/21 to support Hpricot 0.8.1
# Licensed under the same terms as Ruby.
require "rubygems"
require "hpricot"
module Posterous
module Extensions
module HpricotTextTransform
module NodeWithChildrenExtension
def text_transform!(options={}, &block)
return if defined?(name) and name and (name.to_sym == options[:except] or Array(options[:except]).include?(name.to_sym))
children.each { |c| c.text_transform!(options, &block) }
end
end
module TextNodeExtension
def text_transform!(options={}, &block)
content.replace yield(content)
end
end
module EmptyTransform
def text_transform!(options={}, &block)
end
end
end
end
end
Hpricot::Doc.send(:include, Posterous::Extensions::HpricotTextTransform::NodeWithChildrenExtension)
Hpricot::Elem.send(:include, Posterous::Extensions::HpricotTextTransform::NodeWithChildrenExtension)
Hpricot::Text.send(:include, Posterous::Extensions::HpricotTextTransform::TextNodeExtension)
Hpricot::Comment.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::BogusETag.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::XMLDecl.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::ETag.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::ProcIns.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
Hpricot::DocType.send(:include, Posterous::Extensions::HpricotTextTransform::EmptyTransform)
if __FILE__ == $0
require "test/unit"
class HpricotTextTransformTest < Test::Unit::TestCase
def assert_hpricot_transform(expected, input, options={}, &block)
doc = Hpricot(input)
doc.text_transform!(options, &block)
assert_equal(expected, doc.to_s)
end
def test_with_gsub
input = 'xxx'
expected = 'yyy'
assert_hpricot_transform(expected, input, {}) { |text| text.gsub("x", "y") }
end
def test_with_reverse
input = 'hello world from <code>ruby</code>'
expected = 'olleh morf dlrow <code>ybur</code>'
assert_hpricot_transform(expected, input, {}) { |text| text.reverse }
end
def test_with_reverse_exclude_one_tag
input = 'hello world from <code>ruby</code>'
expected = 'olleh morf dlrow <code>ruby</code>'
assert_hpricot_transform(expected, input, {:except => :code}) { |text| text.reverse }
end
def test_with_reverse_exclude_multiple_tags
input = 'hello world from <code>ruby</code>'
expected = 'hello morf dlrow <code>ruby</code>'
assert_hpricot_transform(expected, input, {:except => [:a, :code]}) { |text| text.reverse }
end
def test_with_reverse_exclude_nested_tag
input = 'hello world from </http:><pre><code>ruby</code></pre>
'
expected = 'olleh morf dlrow <pre><code>ruby</code></pre>
'
assert_hpricot_transform(expected, input, {:except => :code}) { |text| text.reverse }
end
end
end