require 'anguilla'

require 'spongiae/tags'

module Spongiae
   module XLIFF
       
       ##
       # Gets only meta-info about a file, reading beginning
       # use regexes because anguilla parser would read the full file
       def self.read_meta(file,encoding = 'UTF-8')
           res = {}
           File.open(file, "r:#{encoding}") do |f|
               while line = f.gets
                   res['original'] = $1 if line =~ /\soriginal\s*=\s*["'](.+?)["']/
                   res['srcLang'] = $1 if line =~ /\ssource-language\s*=\s*["'](.+?)["']/
                   res['traLang'] = $1 if line =~ /\starget-language\s*=\s*["'](.+?)["']/
                   return res if (res['srcLang'] != nil) and (res['traLang'] != nil)
               end
           end
       end
       
       ##
       # Reads all units, but only the translation. 
       def self.read_translations_map(file)
           callback = Xliff_Callback.new('tra',true)
           Anguilla::parse(file,callback)
           return callback.result
       end
       
       ##
       # Generic function to browse units from an XLIFF file
       # Params:
       # +input+::   where to read data from (file, io, etc. see Anguilla for details)
       # +filter+::  whenever we want to work (i.e. do an action) with all units or not
       #    - false:    do not filter, work on all units
       #    - true (default):   for compatibility with older releases, equivalent to 'translated'
       #    - 'translated':     work only with already translated units
       #    - 'untranslated':  work only with non-translated units
       #    - any block:    work only with units for which block returns true
       # +action+::     block to be called for units where filter is true
       #    second parameter is translation unit id; first parameter depends on format parameter
       #    if not given, put results in an hash indexed by translation unit id and return it
       # +format+::     which first parameter to be sent to action
       #    'unit': sends a Spongiae::Unit::Bilingual
       #    'src':  sends the source text (string or Spongiae::Tags::TaggedString)
       #    'tra':  sends the translation text (string or Spongiae::Tags::TaggedString)
       #    'last': sends translation if segment is translated, source instead
       # +segment+::     whenever we should call action for each segment (true) or on each unit (false, default)       
       def self.read_units(input,filter=true,format=:unit,segment=false,&action)
           if segment then
               callback = Xliff_Seg_Callback.new(format,filter,action)
           else
               callback = Xliff_Callback.new(format,filter,action)
           end
           Anguilla::parse(input,callback)
           return callback.result
       end
       
       
       class Xliff_Callback
           include REXML::StreamListener
           
           ##
           # Builds callback. See Spongiae::XLIFF::read_units for meaning of parameters
           def initialize(format,filter = false,action = nil)
               @result = {}; @multi_file = false
               # tries to remain compatible with previous versions
               if format.is_a? Proc then
                   @action = format; @format = :unit
               elsif format == true then
                   @format = :unit
               elsif format == false then
                   @format = :tra  # compatibility with previous versions
               else
                   @format = format
               end
               @action = action if @action == nil
               @action = Proc.new { |el,id| @result[id] = el } if @action == nil
               if filter == false then
                   @filter = Proc.new { true }
               elsif filter == true or filter =~ /^tra/ then
                   if filter =~ /nullable/ then     # empty is translated
                       @filter = Proc.new { (@cur_tra != nil) }
                   else
                       @filter = Proc.new { (@cur_tra != nil) and (@cur_tra.length > 0) }
                   end
               elsif filter =~ /^untra/ then
                   if filter =~ /nullable/ then     # empty is translated
                       @filter = Proc.new { (@cur_tra == nil) }
                   else
                       @filter = Proc.new { (@cur_tra == nil) or (@cur_tra.length == 0) }
                   end
               elsif filter.is_a? Proc then
                   @filter = filter
               else
                   raise "Do not know what to do with filter #{filter}"
               end
           end
           
           attr_reader :result
           
           def tag_start(element, attributes)
               if element == 'trans-unit' or element == 'unit' then 
                   @cur_unit = attributes['id']
                   @cur_text = nil
                   @cur_source = ''
                   @cur_tra = nil   # remains nil until there is almost <target>
               elsif element == 'file' then 
                   @multi_file = true if @cur_subfile != nil 
                   @cur_subfile = attributes['original']
               elsif (element == 'target') or (element == 'source') 
                   @cur_text = ''; @cur_tags = []; @stack_tags = []
                   @cur_tra = '' if (element == 'target') and (@cur_tra == nil)   # was nil to make distinction between no target and target empty 
               elsif element == 'g' then
                   tag = Spongiae::Tags::MarkupTag.new; tag.type = 1; tag.id = attributes['id']         
                   @cur_tags <<  Spongiae::Tags::Placeable.new(@cur_text.length, tag)
                   @stack_tags << tag.id
               elsif element == 'x' then
                   tag = Spongiae::Tags::MarkupTag.new; tag.type = 0; tag.id = attributes['id']         
                   @cur_tags <<  Spongiae::Tags::Placeable.new(@cur_text.length, tag)
               elsif element == 'bpt' then
                   tag = Spongiae::Tags::MarkupTag.new; tag.type = +1
                   tag.id = attributes['rid']; tag.id = attributes['id'] if tag.id == nil
                   @cur_tags <<  Spongiae::Tags::Placeable.new(@cur_text.length, tag)
                   @save_cur_text = @cur_text; @cur_text = nil # do not include contents 
               elsif element == 'ept' then
                   tag = Spongiae::Tags::MarkupTag.new; tag.type = -1
                   tag.id = attributes['rid']; tag.id = attributes['id'] if tag.id == nil
                   @cur_tags <<  Spongiae::Tags::Placeable.new(@cur_text.length, tag)
                   @save_cur_text = @cur_text; @cur_text = nil # do not include contents 
               end
           end
           
           def yield_next_unit(unit_id)
                if @filter.call() then
                    id = unit_id; id = "#{@cur_subfile}!!#{id}" if @multi_file
                    if @format =~ /unit/ then
                        param1 = Spongiae::Unit::Bilingual.new(@cur_subfile, unit_id, {}, @cur_source, @cur_tra)
                    elsif @format =~ /^s(ou)rc/ then
                        param1 = @cur_source
                    elsif @format =~ /^t(arget|ra)/ then
                        param1 = @cur_tra
                    elsif @format =~ /^last/ then
                        if @cur_tra == nil then param1 = @cur_source else param1 = @cur_tra end
                    else
                        raise "Do not know what to do with format = #{@format}"
                    end
                    @action.call(param1,id)
                end
           end
           protected :yield_next_unit
           
           def tag_end(element)
               if element == 'source'
                   @cur_source = @cur_source + @cur_text
                   @cur_source = Spongiae::Tags::TaggedString.new(@cur_source,@cur_tags) if @cur_tags != nil and @cur_tags.count > 0
               elsif element == 'target'
                   @cur_tra = @cur_tra + @cur_text
                   @cur_tra = Spongiae::Tags::TaggedString.new(@cur_tra,@cur_tags) if @cur_tags != nil and @cur_tags.count > 0
               elsif element == 'trans-unit' or element == 'unit' then 
                   yield_next_unit(@cur_unit)
                   @cur_source = @cur_tra = nil
               elsif element == 'g' then
                   tag = Spongiae::Tags::MarkupTag.new; tag.type = -1; tag.id = @stack_tags.pop
                   @cur_tags <<  Spongiae::Tags::Placeable.new(@cur_text.length, tag)
               elsif (element == 'bpt') or (element == 'ept') then
                   @cur_text = @save_cur_text; @save_cur_text = nil
               end
           end
           
           def text(text) 
               if @cur_text != nil then @cur_text = @cur_text + text end
           end
       end
       
       # Callback which launches action for each segment, not each unit
       class Xliff_Seg_Callback < Xliff_Callback
           def initialize(format,filter = false,action = nil)
               super(format,filter,action)
           end
           
           def tag_start(element, attributes)
            # ------- XLIFF 1
               if element == 'trans-unit' then 
                   @cur_seg = 0
                   @sub_src = Hash.new; @sub_tra = Hash.new
                   super(element, attributes)   
               elsif element == 'seg-source' then    
                    @in_seg_source = true
               elsif element == 'mrk' then
                   if attributes['mtype'] == 'seg' then
                        @cur_unit = @cur_unit.to_s unless @cur_unit.is_a? String
                        @cur_seg = attributes.key?('mid') ? attributes['mid'] : @cur_seg + 1
                        @cur_unit = @cur_unit + '/' + @cur_seg.to_s
                        @mrk_level = 0
                        super(@in_seg_source ? 'source' : 'target', attributes) # init new segment
                        @cur_source = ''
                   else
                       @mrk_level += 1 if @mrk_level != nil
                   end                   
            # ------- XLIFF 2
               elsif element == 'unit' then 
                   @cur_seg = 0
                   super(element, attributes)   
               elsif element == 'segment' then     
                   @cur_unit = @cur_unit.to_s unless @cur_unit.is_a? String
                   @cur_seg = attributes.key?('id') ? attributes['id'] : @cur_seg + 1
                   @cur_unit = @cur_unit + '/' + @cur_seg.to_s
                   @cur_text = nil
                   @cur_source = ''
                   @cur_tra = nil   # remains nil until there is almost <target>
               else
                   super(element, attributes)
               end
           end
           
           def tag_end(element)
                if element == 'seg-source' then    # XLIFF 1
                    @in_seg_source = false
               elsif element == 'mrk' and @mrk_level != nil then
                    if @mrk_level > 0 then
                        @mrk_level -= 1
                    else    # 0 => in mtype = seg
                        if @in_seg_source then 
                            super('source')
                            @sub_src[@cur_seg] = @cur_source ; @cur_source = ''
                        else 
                            super('target')
                            @sub_tra[@cur_seg] = @cur_tra ; @cur_tra = nil
                        end
                        @mrk_level = nil
                    end
                elsif element == 'trans-unit' then
                    if @sub_src.empty? then
                       yield_next_unit(@cur_unit)
                    else
                        @sub_src.keys.each do |key0|
                            @cur_source = @sub_src[key0]; @cur_tra = @sub_tra[key0]
                            yield_next_unit(@cur_unit + '/' + key0)
                        end
                    end
               elsif element == 'segment' then     # XLIFF 2
                   # Use same algorithm normally reserved to units
                   # This will also reset all buffers so that next segment do not contain them
                   super('unit')
                   @cur_unit.gsub!(/\/#{@cur_seg}$/,'')
               elsif element == 'unit' then 
                   # Do not register unit, it always contains a segment treated by previous if 
               else
                   super(element)
               end
           end
       end
       
       class XliffWriter
          def initialize(target, file, options, culter, translations_map)
              @target = target ; @culter = culter; @options = options; @file = file; @translations_map = translations_map
              @target.puts '<?xml version="1.0" encoding="UTF-8"?>'
              @prev_file = file
          end
          
          # Factory to choose between version 1 and 2
          def self.create(target, file, options, culter, translations_map)
              if options['--version'] == nil
                  return Xliff1Writer.new(target, file, options, culter, translations_map)
              elsif options['--version'].to_f < 2.0
                  return Xliff1Writer.new(target, file, options, culter, translations_map)
              else
                  return Xliff2Writer.new(target, file, options, culter, translations_map)                  
              end
          end
       end
       
       class Xliff1Writer < XliffWriter
          def initialize(target, file, options, culter, translations_map)
              super(target, file, options, culter, translations_map)
              options['--version'] = '1.2' unless options['--version'] != nil and options['--version'] =~ /1\.\d/
              @target.puts "<xliff xmlns='urn:oasis:names:tc:xliff:document:#{options['--version']}'>"
              @target.puts "    <file original=\"#{file}\" #{xliff_lang_spec(options)}><body>"    
          end
          
          def xliff_lang_spec(options)
              res = ''
              srcKey = options.keys.select { |item| item =~ /lang/i and item =~ /s(ou)?rc/ }
              res = res + " source-language='#{options[srcKey[0]]}'" unless srcKey.count == 0
              traKey = options.keys.select { |item| item =~ /lang/i and item =~ /t(ra|arget)/ }
              res = res + " target-language='#{options[traKey[0]]}'" unless traKey.count == 0
              return res 
          end
          
          def write_unit(unit)
              if unit.file != @prev_file and unit.file != nil then
                  @target.puts '    </body></file>'
                  @target.puts "    <file original=\"#{unit.file}\" #{xliff_lang_spec(@options)}><body>"
                  @prev_file = unit.file
              end
              @target.puts "        <trans-unit id=\"#{unit.id}\">"
              non_xliff = ''    # XLIFF specification forces them to be at the end of the unit
              @target.puts "            <source>#{unit.srcText.to_xliff(version: 1, segmented: false)}</source>"
              if @culter != nil and @options['--version'].to_s =~ /1\.2/ then
                  segments = @culter.cut(unit.srcText)
                  if segments.count > 1 then
                      @target.print  "            <seg-source>"
                      seg_mid = 0
                      segments.each do |txt|
                          seg_mid = seg_mid + 1
                          if @options.key? '--detach-initial-blank' and txt.untagged =~ /^(\s+)/ then
                              @target.print $1 
                              txt.sub!(/^(\s+)/,'')
                          end
                          @target.print "<mrk mtype=\"seg\" mid=\"#{seg_mid}\">#{txt.to_xliff(version: 1, segmented: 'embedded')}</mrk>"                  
                      end
                      @target.puts "</seg-source>"
                      if @options.key? '--culter-debug' then
                          non_xliff +=  "            <culter:debug xmlns:culter='http://culter.silvestris-lab.org/debug'>"
                          non_xliff += @culter.cut_debug(unit.srcText.untagged).gsub /<(.+?):(true|false)>/, "<culter:rule id='\\1' cut='\\2' />"
                          non_xliff += "</culter:debug>"
                      end
                  end
              end
              if unit.respond_to? 'traText'
                  tra = unit.traText
              elsif @translations_map[unit.id] != nil then
                  tra = @translations_map[unit.id]
              end
              unless tra == nil
                if @culter != nil and @options['--version'].to_s =~ /1\.2/ then
                    segments = @culter.cut(tra)
                    if segments.count > 1 then
                        @target.print  "            <target>"
                        seg_mid = 0
                        segments.each do |txt|
                            seg_mid = seg_mid + 1
                            @target.print $1 if @options.key? '--detach-initial-blank' and txt.sub!(/^(\s+)/,'')
                            @target.print "<mrk mtype=\"seg\" mid=\"#{seg_mid}\">#{txt.to_xliff(version: 1, segmented: @culter != nil)}</mrk>"                  
                        end
                        @target.puts "</target>"                    
                    else
                        @target.puts "            <target>#{tra.to_xliff(version: 1, segmented: @culter != nil)}</target>" if tra != nil and tra != ''                                       
                    end
                else
                    @target.puts "            <target>#{tra.to_xliff(version: 1, segmented: @culter != nil)}</target>" if tra != nil and tra != ''                   
                end
              end
              @target.puts non_xliff if non_xliff.length > 0   # XLIFF specification forces them to be at the end of the unit
              @target.puts '        </trans-unit>'
          end
          
          def close
              @target.puts '    </body></file>'
              @target.puts '</xliff>'
          end
       end
       
       class Xliff2Writer < XliffWriter
          def initialize(target, file, options, culter, translations_map)
              super(target, file, options, culter, translations_map)
              options['--version'] = '2.0' unless options['--version'] != nil and options['--version'] =~ /2\.\d/
              @target.puts '<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="' + options['--version'] + '"  xmlns:fs="urn:oasis:names:tc:xliff:fs:2.0" ' + xliff_lang_spec(options) + '>'
              @target.puts "    <file original=\"#{file}\"><body>"
              @prev_file = file
          end
          
          def xliff_lang_spec(options)
              res = ''
              srcKey = options.keys.select { |item| item =~ /lang/i and item =~ /s(ou)?rc/ }
              res = res + " srcLang='#{options[srcKey[0]]}'" unless srcKey.count == 0
              traKey = options.keys.select { |item| item =~ /lang/i and item =~ /t(ra|arget)/ }
              res = res + " trgLang='#{options[traKey[0]]}'" unless traKey.count == 0
              return res 
          end
          
          def write_unit(unit)
              if unit.file != @prev_file and unit.file != nil then
                  @target.puts '    </file>'
                  @target.puts "    <file original=\"#{unit.file}\">"
                  @prev_file = unit.file
              end
              @target.puts "        <unit id=\"#{unit.id}\">"
              if @culter != nil
                  segments = @culter.cut(unit.srcText)
                  if @options.key? '--culter-debug' and segments.count > 1 then    # in XLIFF 2, must be before segments themselves
                     @target.print  "            <culter:debug xmlns:culter='http://culter.silvestris-lab.org/debug'>"
                     @target.print @culter.cut_debug(unit.srcText).gsub /<(.+?):(true|false)>/, "<culter:rule id='\\1' cut='\\2' />"
                     @target.puts "</culter:debug>"
                  end                                    
                  if unit.respond_to? 'traText' then tra = @culter.cut(unit.traText) else tra = nil end
                  i = 0; while i < segments.count
                      if @options.key? '--detach-initial-blank' and segments[i].untagged =~ /^(\s+)/ then
                          @target.puts '            <ignorable>'
                          @target.puts "               <source>#{$1}</source>" 
                          segments[i].gsub!(/^(\s+)/,'')
                          if tra != nil and tra.count >= i and tra[i].untagged =~ /^(\s+)/ then
                              @target.puts "               <target>#{$1}</target>"
                              tra[i].gsub!(/^(\s+)/,'')
                          end
                          @target.puts '            </ignorable>'
                      end
                      @target.puts '            <segment>'
                      @target.puts "                <source>#{segments[i].to_xliff(version: 2, segmented: false)}</source>"
                      @target.puts "                <target>#{tra[i].to_xliff(version: 2, segmented: false)}</target>" if tra != nil and tra.count >= i
                      @target.puts '            </segment>'
                      i = i + 1
                  end
              else  # in XLIFF 2 must be almost one segment
                  @target.puts '            <segment>'
                  @target.puts "                <source>#{unit.srcText.to_xliff(version: 2, segmented: false)}</source>"
                  @target.puts "                <target>#{unit.traText.to_xliff(version: 2, segmented: false)}</target>" if unit.respond_to? 'traText'
                  @target.puts '            </segment>'
              end
              @target.puts '        </unit>'
          end
          
          def close
              @target.puts '    </file>'
              @target.puts '</xliff>'
          end
       end       
       
   end
end
