#! /usr/bin/env ruby 

formatOptions = {}
ARGV.each { |item| formatOptions[$1] = $2 if item =~ /^--fmt:([\w\-]+)=(.+)$/ }
ARGV.reject! { |item| item =~ /^--fmt:([\w\-]+)=(.+)$/ }

if ARGV.count < 1
    puts "Syntax: #{__FILE__} [--fmt:option=value]{0,*} <native original>"
    exit
end

$LOAD_PATH << "#{File.dirname(__FILE__)}/../lib"                # For non-standard installation

require 'getoptlong'


options = {}
GetoptLong.new(
  [ '--seg', '-s', GetoptLong::REQUIRED_ARGUMENT ],
  [ '--src-lang', '--srcLang', '-o', GetoptLong::OPTIONAL_ARGUMENT ],
  [ '--tra-lang', '--traLang', '-t', GetoptLong::OPTIONAL_ARGUMENT ],
  [ '--dest-file', '--destFile', '-d', GetoptLong::OPTIONAL_ARGUMENT ],    
  [ '--version', '--xliff-version', '-v', GetoptLong::OPTIONAL_ARGUMENT ],      # Version of XLIFF format to be created
  [ '--detach-initial-blank', '-b', GetoptLong::NO_ARGUMENT ],    # during segmentation, consider starting blank outside current segment
  [ '--culter-debug', '-c', GetoptLong::NO_ARGUMENT ],
).each { |key,val| options[key] = val }

culter = nil
# Load culter module only if required. So, Culter is not mandatory to use Spongiae
if options['--seg'] =~ /^simple$/ then
    require 'culter/simple'; culter = Culter::Simple.create(keeps_spaces: true)
elsif options['--seg']  =~ /\.srx$/ then
    require 'culter/srx'
    culter = Culter::SRX::SrxDocument.new(options['--seg'])
    if options['--src-lang'] != nil then
        culter = culter.segmenter(options['--src-lang'])
    else
        culter = culter.segmenter('x-unknown')
        puts "SRX found, but missing source language, segmentation will be based on common rules"
    end
elsif options.key? '--culter-debug' and not options.key? '--seg' then
    # is an error but we may use OmegaT as a fallback
    if File.exists?("#{File.dirname ARGV[0]}/../omegat.project") then
        puts "OmegaT project detected, try to use OmegaT's segmentation rules"
        if File.exists?("#{File.dirname ARGV[0]}/../omegat/segmentation.srx") then
            options['--seg'] = "#{File.dirname ARGV[0]}/../omegat/segmentation.srx"
            puts "Using project-specific rules: #{options['--seg']}"
        else
            options['--seg'] = "#{Dir.home}/.omegat/segmentation.srx"
            if not File.exists?(options['--seg']) then
                puts "OmegaT project detected but cannot find OmegaT options. Exit"
                exit
            end
        end
        require 'culter/srx'        
        culter = Culter::SRX::SrxDocument.new(options['--seg'])
        if options['--src-lang'] != nil then
            culter = culter.segmenter(options['--src-lang'])
        else
            culter = culter.segmenter('x-unknown')
            puts "SRX found, but missing source language, segmentation will be based on common rules"
        end
    else
        puts "Cannot use option --culter-debug without segmentation rules"
        exit
    end
end


file = ARGV.shift

require 'spongiae/formats/all'
unless "".respond_to? :to_xliff     # if tags are not loaded
    class String
        def to_xliff(segmented = true) encode(:xml => :text) end
    end
end

ext = $1 if file =~ /\.(\w+)$/
fmt = Spongiae::Formats::ALL[ext].sniff(file)
fmt.load!
if (culter != nil) and (ext =~ /docx/i) then formatOptions['tags'] = "false"; puts "Temporarily inactivate tags because of segmentation" end
reader = fmt.create(file,formatOptions)

require 'spongiae/xliff'

dest = options['--dest-file']
if dest == nil or dest.length == 0 then
    dest = file.gsub(/\.(\w+)$/) { options['--tra-lang'] == nil ? ".#{$1}.xlf" : "-#{options['--tra-lang']}.#{$1}.xlf" }
end
if File.exists? dest then
    puts "Overwrite #{dest}, keeping translations"
    translations_map = Spongiae::XLIFF::read_translations_map(dest)
    puts "#{translations_map.count} translations found"
else
    puts "Writing #{dest}"
    translations_map = Hash.new # empty, but must not be nil
end

File.open(dest, "w:UTF-8") do |f|
    writer = Spongiae::XLIFF::XliffWriter.create(f, file,options, culter, translations_map)
    reader.read_unit { |unit| writer.write_unit(unit) }
    writer.close    
end
