
# ------ Extend class String so that it can behave like a TaggedString
class String
    def to_native_xml()
        phrase = self if phrase == nil
        return phrase.encode(:xml => :text)
    end
    def to_xliff(version: 1, segmented: true) to_native_xml() end
    def to_tmx(tags: false) to_native_xml() end
    def to_html() to_native_xml() end
    def restore_tags(unit) self end
    def placeables() [] end
    def untagged() self end
end


module Spongiae
   module Tags    
       
       class Tag
           # 1 = opening, 0 = isolated, -1 = close
           attr_accessor :type
       end
       
       # For XML and HTML
       class MarkupTag < Tag
           attr_accessor :name, :attrs
           attr_accessor :id
           attr_accessor :role
           
           # Convert to xml/h
           def to_native_xml
               res = "<#{@name}"
               attrs.each { |k,v| res = res + " #{k} = #{v == nil ? '' : v.encode(:xml => :attr)}" } unless attrs == nil or @type < 0
               case @type
               when -1 then res.insert(1, '/'); res = res + ">"
               when  0 then res = res + " />"
               when +1 then res = res + ">"
               end
               return res
           end
           
           def to_xliff(version: 1, segmented: true)
               params = "id=\"#{@id}\""
               params = params + " ctype=\"#{@role}\"" if @role != nil and version < 2
               params = params + " type=\"fmt\" subType=\"xlf:#{@role}\"" if @role != nil and version >= 2
               if version < 2 then
                    case @type
                    when -1 then 
                        if segmented then 
                            if segmented =~ /embed/ then
                                return "<ept id=\"#{@id}\">#{to_native_xml().tr('<>','{}').encode(:xml => :text)}</ept>"
                            else
                                return "<ept id=\"#{@id}\" />"
                            end
                        else 
                            return "</g>" 
                        end
                    when  0 then 
                        return "<x #{params} />" 
                    when +1 then 
                        if segmented then 
                            if segmented =~ /embed/ then
                                return "<bpt id=\"#{@id}\">#{to_native_xml().tr('<>','{}').encode(:xml => :text)}</bpt>"
                            else
                                return "<bpt id=\"#{@id}\" />"
                            end
                        else 
                            return "<g #{params}>" 
                        end
                    end
                else
                    case @type
                    when -1 then 
                        if segmented then return "<ec id=\"#{@id}\" />" else return "</pc>" end
                    when  0 then 
                        return "<ph #{params} />" 
                    when +1 then 
                        if segmented then return "<sc #{params} />" else return "<pc #{params}>" end
                    end
                end            
           end
           
           def to_tmx(tags: true)
               if tags == false or tags =~ /^level\s*1$/i then
                   return ''    # no tags
               elsif tags == true or tags =~ /^level\s*2$/i or tags =~ /embed/i then
                    return to_xliff(version: 1, segmented: 'embed').gsub(/<ph id/, '<ph x').gsub(/<([be]pt) id="(.+)"/, '<$1 i="$2" x="$2"')
               elsif tags =~ /empty/i then
                    return to_xliff(version: 1, segmented: 'empty').gsub(/<ph id/, '<ph x').gsub(/<([be]pt) id="(.+)"/, '<$1 i="$2" x="$2"')
               elsif tags =~ /OmegaT/i then
                    return to_native_xml().gsub('<','&lt;').gsub('>','&gt;')
               end
           end
           
           def to_html()
                case @type
                    when -1 then 
                        if @role != nil then
                            return "</#{@role[0]}>"
                        else
                            return "</em>"
                        end
                    when  0 then 
                        return "<font color=gray>#{to_native_xml().encode(:xml => :text)}</font>" 
                    when +1 then 
                        if @role != nil then
                            return "<#{@role[0]}>"
                        else
                            return "<em>"
                        end
                end
            end
       end
       
       class PlaceHolder
           attr_accessor :contents, :id
           
           def initialize(contents,id) @contents = contents; @id = id end
               
           def to_native_xml() @contents end
           def to_xliff(version: 1, segmented: true) 
                return "<ph id=\"#{@id}\">#{to_native_xml().tr('<>','{}').encode(:xml => :text)}</ph>"
           end
           def to_tmx(tags: true) 
               if tags then
                   if tags =~ /empty/i then
                       return "<ph x=\"#{@id}\" />"
                   else
                       return "<ph x=\"#{@id}\">#{to_native_xml().tr('<>','{}').encode(:xml => :text)}</ph>"
                   end
               else
                   return to_native_xml().encode(:xml => :text)
               end
           end
           def to_html()
               return "<font color=gray>#{to_native_xml().encode(:xml => :text)}</font>"
           end
       end
       
       # Anything which should not be translated. Can be a tag or something else
       class Placeable
           attr_accessor :pos, :item
           
           def initialize(pos,item) @pos = pos ; @item = item end
               
           def to_native_xml() @item.to_native_xml end
           def to_xliff(version: 1, segmented: true) @item.to_xliff(version: version, segmented: segmented) end
           def to_tmx(tags: false) @item.to_tmx(tags: tags) end
           def to_html() @item.to_html() end
       end
       
       class TaggedString
           attr_accessor :text, :placeables
           
           def initialize(text, placeables = []) @text = text; @placeables = placeables end
               
           # Methods which make this class compatible with Culter
           def clone() TaggedString.new(@text.clone, @placeables.clone) end               
           def force_encoding(code) @text.force_encoding(code) end
           def =~(a) @text =~ a end
           def ev_sub(match,b,placeables)
                res = b.gsub(/\\(\d+)/) { match[$1.to_i] }
                diff = match[0].length - res.length
                placeables.each { |item| item.pos = item.pos - diff if item.pos > match.end(0) }
                res
           end
           private :ev_sub
           def sub(a,b)
               # TaggedString.new(@text.gsub(a,b), calculate shift for @placeables) 
               a = %r{#{a}} if not a.is_a? Regexp
               text1 = @text.clone; placeables1 = @placeables.collect { |item| item.clone }
               text1.sub!(a) { ev_sub(Regexp.last_match,b,placeables1) }
               @text =~ a       # reset Regexp.last_match and $1, $2, ...
               return TaggedString.new(text1, placeables1)                
           end
           def sub!(a,b)
               # @text.gsub!(a,b) and shift placeables
               a = %r{#{a}} if not a.is_a? Regexp
               text1 = @text.clone
               @text.sub!(a) { ev_sub(Regexp.last_match,b,@placeables) }
               text1 =~ a       # reset Regexp.last_match and $1, $2, ...    
           end
           def gsub(a,b)
               # TaggedString.new(@text.gsub(a,b), calculate shift for @placeables) 
               a = %r{#{a}} if not a.is_a? Regexp
               text1 = @text.clone; placeables1 = @placeables.collect { |item| item.clone }
               text1.gsub!(a) { ev_sub(Regexp.last_match,b,placeables1) }
               return TaggedString.new(text1, placeables1)                
           end
           def gsub!(a,b)
               # @text.gsub!(a,b) and shift placeables
               a = %r{#{a}} if not a.is_a? Regexp
               @text.gsub!(a) { ev_sub(Regexp.last_match,b,@placeables) }
           end
           def split(regex)            # produce tagged strings, with tags at correct location
               start = 0
               tab = @text.split(regex); i = 1
               return tab.collect do |seg| 
                   ts = TaggedString.new seg, extract_placeables(start, seg.length)
                   start = start + seg.length
                   unless i == tab.length
                      while @text[start,tab[i].length] != tab[i]
                          start = start + 1 
                      end
                       i = i + 1
                   end
                   ts
               end 
           end
           def scan(regex,&action)
               start = 0
               tab = @text.split(regex); i = 1
               tab.collect do |seg| 
                   ts = TaggedString.new seg, extract_placeables(start, seg.length)
                   start = start + seg.length
                   unless i == tab.length
                      while @text[start,tab[i].length] != tab[i]
                          start = start + 1 
                      end
                       i = i + 1
                   end
                   yield ts
               end 
           end
           def extract_placeables(start,len)
               @placeables.select { |pl| 
                       if start == 0 and pl.pos == 0 then true
                       elsif pl.pos > start and pl.pos < start + len then true
                       elsif pl.pos == start then pl.item.type >= 0
                       elsif pl.pos == start + len then pl.item.type < 0
                       else false
                       end
                   }.collect { |pl2| Placeable.new(pl2.pos - start, pl2.item) }
           end
           def +(st)
               return TaggedString.new(@text + st, @placeables) if st.is_a? String 
               if st.is_a? TaggedString
                   newText = @text + st.text
                   newPlaceables = @placeables.clone
                   st.placeables.each { |pl| newPlaceables << Placeable.new(pl.pos + @text.length, pl.item) }
                   return TaggedString.new(newText, newPlaceables)
               end
               raise "Cannot do this operation"
           end
           def length() @text.length end
           
           # Convet to string with tags
           def to_any_xml(&sub)               
               phrase = @text
               return phrase.encode(:xml => :text) unless placeables.count > 0
               
               phrase = phrase.clone; prev = nil
               placeables.reverse_each do |item|
                   if prev == nil then 
                       phrase[item.pos,phrase.length] = phrase[item.pos,phrase.length].encode(:xml => :text)
                   else
                       phrase[item.pos,prev - item.pos] = phrase[item.pos,prev - item.pos].encode(:xml => :text)                       
                   end
                   phrase.insert(item.pos,sub.call(item.item)) 
                   prev = item.pos
               end
               phrase[0,prev] = phrase[0,prev].encode(:xml => :text) if prev != nil
               return phrase
           end
           def to_native_xml() to_any_xml { |item| item.to_native_xml } end
           def to_xliff(version: 1, segmented: true) to_any_xml { |item| item.to_xliff(version: version, segmented: segmented) }  end
           def to_tmx(tags: false) to_any_xml { |item| item.to_tmx(tags: tags) }  end
           def to_html() to_any_xml { |item| item.to_html() }  end
           # Remove tags
           def untagged() @text end
               
           # Synchronise tags between source and target
           def restore_tags(unit) 
               oriTags = unit.srcText.placeables
               @placeables.each do |tra|
                   if tra.item.name == nil then
                       oriTags.each do |ori|
                           if ori.item.id.to_s == tra.item.id.to_s then 
                               tra.item.name = ori.item.name 
                               tra.item.attrs = ori.item.attrs if ori.item.type >= 0
                           end
                       end
                   end
               end
               return self
           end
       end
       
       def self.untag(part) part.gsub(/<\/\w+>/, '') end
   end
end
