Ruby/XML/rexml
Материал из Wiki.crossplatform.ru
A basic demonstration of parsing an XML file looking for certain elements:
require "rexml/document" xml = <<END_XML <people> <person> <name>P</name> <gender>Male</gender> </person> <person> <name>F</name> <gender>Male</gender> </person> </people> END_XML tree = REXML::Document.new(xml) tree.elements.each("people/person") do |person| puts person.get_elements("name").first end
Add new element
require "rexml/document" doc = REXML::Document.new meeting = doc.add_element "meeting" meeting_start = Time.local(2006, 10, 31, 13) meeting.add_element("time", { "from" => meeting_start, "to" => meeting_start + 3600 }) agenda = meeting.add_element "agenda" doc.children[1] item1 = agenda.add_element "item" puts doc.children[1] item1.text = "new Test" puts doc.children[1] doc.write
Assign new value to node
require "delegate" require "rexml/text" require "rexml/document" class EntitySubstituter < DelegateClass(IO) def initialize(io, document, filter=nil) @document = document @filter = filter super(io) end def <<(s) super(REXML::Text::unnormalize(s, @document.doctype, @filter)) end end str = %{<?xml version="1.0"?> <!DOCTYPE doc [ <!ENTITY product "Stargaze"> <!ENTITY version "2.3"> ]> <doc> &product; v&version; </doc>} doc = REXML::Document.new str text_node = doc.root.children[0] text_node.value = "&product; v&version;" doc.write
Checking XML Well-Formedness
require "rexml/document" def valid_xml?(xml) begin REXML::Document.new(xml) rescue REXML::ParseException # Return nil if an exception is thrown end end bad_xml = %{ <tasks> <pending> <entry>G</entry> <done> <entry>D</entry> </tasks>} valid_xml?(bad_xml) # => nil good_xml = %{ <groceries> <bread>W</bread> <bread>Q</bread> </groceries>} doc = valid_xml?(good_xml)
Compressing Whitespace in an XML Document
require "rexml/document" text = %{<doc><a>Some whitespace</a> <b>Some more</b></doc>} puts REXML::Document.new(text, { :compress_whitespace => :all }).to_s
Compress new line character
require "rexml/document" text = %{<doc><a>Some text</a>\n <b>Some more</b>\n\n} REXML::Document.new(text, { :compress_whitespace => :all }).to_s # => "<doc><a>Some text</a>\n <b>Some more</b>\n</doc>"
Compress whitespace only in anchor
require "rexml/document" text = %{<doc><a>Some whitespace</a> <b>Some more</b></doc>} REXML::Document.new(text, { :compress_whitespace => %w{a} }).to_s # => "<doc><a>Some whitespace</a> <b>Some more</b></doc>"
Creating and Modifying XML Documents
require "rexml/document" doc = REXML::Document.new meeting = doc.add_element "meeting" meeting_start = Time.local(2006, 10, 31, 13) meeting.add_element("time", { "from" => meeting_start, "to" => meeting_start + 3600 }) doc.children[0] # => <meeting> ... </> puts doc.children[0].children[0] puts doc.children[0] # => <?xml ... ?> puts doc.children[1] # => <meeting> ... </>
Entity Substituter
require "delegate" require "rexml/text" require "rexml/document" class EntitySubstituter < DelegateClass(IO) def initialize(io, document, filter=nil) @document = document @filter = filter super(io) end def <<(s) super(REXML::Text::unnormalize(s, @document.doctype, @filter)) end end str = %{<?xml version="1.0"?> <!DOCTYPE doc [ <!ENTITY product "Stargaze"> <!ENTITY version "2.3"> ]> <doc> &product; v&version; </doc>} doc = REXML::Document.new str output = EntitySubstituter.new($stdout, doc) doc.write(output)
Extracting Data From a Document"s Tree Structure
orders_xml = %{ <orders> <order> <number>1</number> <date>02/10/2008</date> <customer>C</customer> <items> <item upc="0" desc="Roses" qty="240" /> <item upc="1" desc="Candy" qty="160" /> </items> </order> </orders>} require "rexml/document" orders = REXML::Document.new(orders_xml) orders.root.each_element do |order| # each <order> in <orders> order.each_element do |node| # <customer>, <items>, etc. in <order> if node.has_elements? node.each_element do |child| # each <item> in <items> puts "#{child.name}: #{child.attributes["desc"]}" end else # the contents of <number>, <date>, etc. puts "#{node.name}: #{node.text}" end end end
Extracting Data While Parsing a Document
event_xml = %{ <events> <clean system="dev" start="01:35" end="01:55" area="build" error="1" /> <backup system="test" start="02:00" end="02:47" size="327450" error="0" /> </events>} require "rexml/document" require "rexml/streamlistener" class ErrorListener include REXML::StreamListener def tag_start(name, attrs) if attrs["error"] != nil and attrs["error"] != "0" puts %{Event "#{name}" failed for system "#{attrs["system"]}" } + %{with code #{attrs["error"]}} end end end REXML::Document.parse_stream(event_xml, ErrorListener.new)
get a Node from xml document
orders_xml = %{ <orders> <order> <number>1</number> <date>02/10/2008</date> <customer>C</customer> <items> <item upc="0" desc="Roses" qty="240" /> <item upc="1" desc="Candy" qty="160" /> </items> </order> </orders>} require "rexml/document" orders = REXML::Document.new(orders_xml) my_order = orders.root.elements[1] first_node = my_order.elements[1] first_node.name # => "number" first_node.next_element.name # => "date" first_node.parent.name # => "order"
Get the first element under root
require "rexml/document" def valid_xml?(xml) begin REXML::Document.new(xml) rescue REXML::ParseException # Return nil if an exception is thrown end end good_xml = %{ <groceries> <bread>W</bread> <bread>Q</bread> </groceries>} doc = valid_xml?(good_xml) p doc.root.elements[1] # => <bread> ... </>
Leave whitespace for anchor
require "rexml/document" text = %{<doc><a>Some whitespace</a> <b>Some more</b></doc>} REXML::Document.new(text, { :respect_whitespace => %w{a} }).to_s # => "<doc><a>Some whitespace</a> <b>Some more</b></doc>"
Output xml with REXML
require "rexml/document" good_xml = %{ <groceries> <bread>W</bread> <bread>Q</bread> </groceries>} REXML::Document.new(good_xml).write
require "rexml/document"
text = %{<doc><a>Some whitespace</a> <b>Some more</b></doc>} REXML::Document.new(text, { :compress_whitespace => :all, :ignore_whitespace_nodes => :all }).to_s # => "<doc><a>Some text</a><b>Some more</b></doc>"
REXML is already part of Ruby, you get it free of charge.
#!/usr/bin/env ruby require "rexml/document" include REXML address = <<XML <address> <name><given>A</given><family>B/family></name> <street>9876 St.</street> <city>CA</city> <state>Colorado</state> <code>81000</code> <country>USA</country> </address> XML document = Document.new( address ) puts document
Use rexml to read xml file
File: matz.xml <hello>Matz!</hello> #!/usr/bin/env ruby require "rexml/document" file = File.new( "m.xml" ) doc = REXML::Document.new file puts doc.to_s