Ruby/XML/rexml

Материал из Wiki.crossplatform.ru

Перейти к: навигация, поиск

Содержание

A basic demonstration of parsing an XML file looking for certain elements:

require "rexml/document"
xml = <<END_XML
<people>
  <person>
    <name>P</name>
    <gender>Male</gender>
  </person>
  <person>
    <name>F</name>
    <gender>Male</gender>
  </person>
</people>
END_XML
tree = REXML::Document.new(xml)
tree.elements.each("people/person") do |person|
  puts person.get_elements("name").first
end



Add new element

require "rexml/document"
doc = REXML::Document.new
meeting = doc.add_element "meeting"
meeting_start = Time.local(2006, 10, 31, 13)
meeting.add_element("time", { "from" => meeting_start,
                              "to" => meeting_start + 3600 })
 
agenda = meeting.add_element "agenda"
doc.children[1]
 
item1 = agenda.add_element "item"
puts doc.children[1]
item1.text = "new Test"
puts doc.children[1]
doc.write



Assign new value to node

require "delegate"
require "rexml/text"
require "rexml/document"
class EntitySubstituter < DelegateClass(IO)
  def initialize(io, document, filter=nil)
    @document = document
    @filter = filter
    super(io)
  end
  def <<(s)
    super(REXML::Text::unnormalize(s, @document.doctype, @filter))
  end
end
 
str = %{<?xml version="1.0"?>
<!DOCTYPE doc [
 <!ENTITY product "Stargaze">
 <!ENTITY version "2.3">
]>
<doc>
 &product; v&version; 
</doc>}
 
doc = REXML::Document.new str
text_node = doc.root.children[0]
text_node.value = "&product; v&version;"
doc.write



Checking XML Well-Formedness

require "rexml/document"
def valid_xml?(xml)
 begin
   REXML::Document.new(xml)
 rescue REXML::ParseException
   # Return nil if an exception is thrown
 end
end
bad_xml = %{
<tasks>
 <pending>
   <entry>G</entry>
 <done>
   <entry>D</entry>
</tasks>}
valid_xml?(bad_xml)                                     # => nil
good_xml = %{
<groceries>
 <bread>W</bread>
 <bread>Q</bread>
</groceries>}
doc = valid_xml?(good_xml)



Compressing Whitespace in an XML Document

require "rexml/document"
text = %{<doc><a>Some      whitespace</a>    <b>Some   more</b></doc>}
puts REXML::Document.new(text, { :compress_whitespace => :all }).to_s



Compress new line character

require "rexml/document"
 
text = %{<doc><a>Some   text</a>\n  <b>Some   more</b>\n\n}
REXML::Document.new(text, { :compress_whitespace => :all }).to_s
# => "<doc><a>Some text</a>\n <b>Some more</b>\n</doc>"



Compress whitespace only in anchor

require "rexml/document"
text = %{<doc><a>Some      whitespace</a>    <b>Some   more</b></doc>}
REXML::Document.new(text, { :compress_whitespace => %w{a} }).to_s
# => "<doc><a>Some whitespace</a>    <b>Some   more</b></doc>"



Creating and Modifying XML Documents

require "rexml/document"
doc = REXML::Document.new
meeting = doc.add_element "meeting"
meeting_start = Time.local(2006, 10, 31, 13)
meeting.add_element("time", { "from" => meeting_start,
                              "to" => meeting_start + 3600 })
doc.children[0]                                # => <meeting> ... </>
puts doc.children[0].children[0]
puts doc.children[0]                                # => <?xml ... ?>
puts doc.children[1]                                # => <meeting> ... </>



Entity Substituter

require "delegate"
require "rexml/text"
require "rexml/document"
class EntitySubstituter < DelegateClass(IO)
  def initialize(io, document, filter=nil)
    @document = document
    @filter = filter
    super(io)
  end
  def <<(s)
    super(REXML::Text::unnormalize(s, @document.doctype, @filter))
  end
end
 
str = %{<?xml version="1.0"?>
<!DOCTYPE doc [
 <!ENTITY product "Stargaze">
 <!ENTITY version "2.3">
]>
<doc>
 &product; v&version; 
</doc>}
doc = REXML::Document.new str
output = EntitySubstituter.new($stdout, doc)
doc.write(output)



Extracting Data From a Document"s Tree Structure

orders_xml = %{
<orders>
  <order>
    <number>1</number>
    <date>02/10/2008</date>
    <customer>C</customer>
    <items>
      <item upc="0" desc="Roses" qty="240" />
      <item upc="1" desc="Candy" qty="160" />
    </items>
  </order>
</orders>}
require "rexml/document"
orders = REXML::Document.new(orders_xml)
orders.root.each_element do |order|     # each <order> in <orders>
  order.each_element do |node|          # <customer>, <items>, etc. in <order>
    if node.has_elements?
      node.each_element do |child|      # each <item> in <items>
        puts "#{child.name}: #{child.attributes["desc"]}"
      end
    else
      # the contents of <number>, <date>, etc.
      puts "#{node.name}: #{node.text}"
    end
  end
end



Extracting Data While Parsing a Document

event_xml = %{
<events>
  <clean system="dev" start="01:35" end="01:55" area="build" error="1" />
  <backup system="test" start="02:00" end="02:47" size="327450" error="0" />
</events>}
require "rexml/document"
require "rexml/streamlistener"
class ErrorListener
  include REXML::StreamListener
  def tag_start(name, attrs)
    if attrs["error"] != nil and attrs["error"] != "0"
      puts %{Event "#{name}" failed for system "#{attrs["system"]}" } +
    %{with code #{attrs["error"]}}
    end
  end
end
REXML::Document.parse_stream(event_xml, ErrorListener.new)



get a Node from xml document

orders_xml = %{
<orders>
  <order>
    <number>1</number>
    <date>02/10/2008</date>
    <customer>C</customer>
    <items>
      <item upc="0" desc="Roses" qty="240" />
      <item upc="1" desc="Candy" qty="160" />
    </items>
  </order>
</orders>}
require "rexml/document"
orders = REXML::Document.new(orders_xml)
my_order = orders.root.elements[1]
first_node = my_order.elements[1]
first_node.name                                      # => "number"
first_node.next_element.name                         # => "date"
first_node.parent.name                               # => "order"



Get the first element under root

require "rexml/document"
def valid_xml?(xml)
 begin
   REXML::Document.new(xml)
 rescue REXML::ParseException
   # Return nil if an exception is thrown
 end
end
good_xml = %{
<groceries>
 <bread>W</bread>
 <bread>Q</bread>
</groceries>}
doc = valid_xml?(good_xml)
p doc.root.elements[1]                                   # => <bread> ... </>



Leave whitespace for anchor

require "rexml/document"
text = %{<doc><a>Some      whitespace</a>    <b>Some   more</b></doc>}
REXML::Document.new(text, { :respect_whitespace => %w{a} }).to_s
# => "<doc><a>Some      whitespace</a> <b>Some more</b></doc>"



Output xml with REXML

require "rexml/document"
good_xml = %{
<groceries>
 <bread>W</bread>
 <bread>Q</bread>
</groceries>}
REXML::Document.new(good_xml).write



require "rexml/document"

text = %{<doc><a>Some      whitespace</a>    <b>Some   more</b></doc>}
REXML::Document.new(text, { :compress_whitespace => :all,
                        :ignore_whitespace_nodes => :all }).to_s
# => "<doc><a>Some text</a><b>Some more</b></doc>"



REXML is already part of Ruby, you get it free of charge.

#!/usr/bin/env ruby
require "rexml/document"
include REXML
address = <<XML
<address>
 <name><given>A</given><family>B/family></name>
 <street>9876 St.</street>
 <city>CA</city>
 <state>Colorado</state>
 <code>81000</code>
 <country>USA</country>
</address>
XML
document = Document.new( address )
puts document



Use rexml to read xml file

File: matz.xml
<hello>Matz!</hello>
#!/usr/bin/env ruby
require "rexml/document"
file = File.new( "m.xml" )
doc = REXML::Document.new file
puts doc.to_s