require 'spongiae/util/xml_callbacks'
require 'spongiae/unit'
require 'spongiae/tags'

require 'nokogiri'
require 'set'

module Spongiae
   module Formats
       
       class HtmlCallbacks < Nokogiri::XML::SAX::Document
           
           def initialize(sub)
               @sub = sub
               @path = ''; @text = ''; @inPara = false
               @keys = Set.new
               @tags = []; @tagStack = []
           end
           
           def to_hash(attrs)
               res = Hash.new
               attrs.each { |row| res[row[0]] = row[1] }
               return res
           end
           
           include Spongiae::Util::XmlCallbacks::InputCallback
           
           def is_para_start?(name,attrs = nil) name =~ /^p|title|li|h\d$/ end
           def role_for_tag(name,attrs = nil) 
                case name
                   when 'b' then return 'bold'
                   when 'i' then return 'italic'
                end
                return nil
           end
               
           
           def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
               return if name =~ /^html|head|body$/
               @path = @path + "/#{name}"
               attrs = to_hash(attrs)
               if attrs['id'] != nil and attrs['id'].length > 0 then
                   @path = @path + "[id=#{attrs['id']}]"
               else
                   i = 0; i = i + 1 while @keys.include? "#{@path}[#{i}]"
                   @keys.add "#{@path}[#{i}]"
                   @path = "#{@path}[#{i}]"
               end
               check_para_start(name,attrs)
           end

           def start_element(name, attrs = []) start_element_namespace(name,attrs) end
           
           def end_element_namespace(name, prefix = nil, uri = nil)
               check_para_end(name,true); pop_para_end_tag(name)
               @path = @path[0, @path.rindex('/')] if @path.rindex('/') != nil
           end
           
           def end_element(name) end_element_namespace(name) end           
       end
       
       class HtmlCallbacksOutput <  HtmlCallbacks
           def initialize(dest,sub)
               super(sub); @dest = dest
           end
           
            include Spongiae::Util::XmlCallbacks::OutputCallback
            
           def start_element(name, attrs = [])
               super(name, attrs)
               print_para_start(name,attrs)
           end
           
           def end_element(name)
               super(name)
               print_para_end(name)
           end
           
           def characters(text) 
               if @path != nil then 
                   @text = @text + text 
               else
                   @dest.print text
               end
           end
       end
       
       class HtmlDocument
           
           def initialize(file, props = {})
               @file = file
           end
           
           # read_strings : build unit for each string
           # For Plain Text, this is one string per line.
           def read_unit(&sub)
               callback = HtmlCallbacks.new(sub)
               parser = Nokogiri::HTML::SAX::Parser.new(callback)
               parser.parse(File.read(@file, mode: 'rb'))
           end
           
           def translate(dest_file_name,translations_map,props={})
               File.open(dest_file_name, 'w:UTF-8') do |dest|      
                  callback = Spongiae::Util::XmlCallbacks::OutputCallback.translation_proc(translations_map,dest)
                  callback = HtmlCallbacksOutput.new(dest, callback)
                  # REXML::Document.parse_stream(File.new(@file),callback)
                  parser = Nokogiri::HTML::SAX::Parser.new(callback)
                  parser.parse(File.read(@file, mode: 'rb'))
              end
           end
           
       end
       
   end
end
