#! /usr/bin/env ruby 

formatOptions = {}
ARGV.each { |item| formatOptions[$1] = $2 if item =~ /^--fmt:([\w\-]+)=(.+)$/ }
ARGV.reject! { |item| item =~ /^--fmt:([\w\-]+)=(.+)$/ }

formatOptions['tags'] = "false"     # statistics are always without tags

if ARGV.count < 1
    puts "Syntax: #{__FILE__} [--fmt:option=value]{0,*} <native original>"
    exit
end

$LOAD_PATH << "#{File.dirname(__FILE__)}/../lib"                # For non-standard installation

require 'getoptlong'


options = {}
GetoptLong.new(
  [ '--seg', '-s', GetoptLong::REQUIRED_ARGUMENT ],
  [ '--src-lang', '--srcLang', '-o', GetoptLong::OPTIONAL_ARGUMENT ],
).each { |key,val| options[key] = val }

culter = nil
# Load culter module only if required. So, Culter is not mandatory to use Spongiae
if options['--seg'] =~ /^simple$/ then
    require 'culter/simple'; culter = Culter::Simple.create(keeps_spaces: true)
elsif options['--seg']  =~ /\.srx$/ then
    require 'culter/srx'
    culter = Culter::SRX::SrxDocument.new(options['--seg'])
    if options['--src-lang'] != nil then
        culter = culter.segmenter(options['--src-lang'])
    else
        culter = culter.segmenter('x-unknown')
        puts "SRX found, but missing source language, segmentation will be based on common rules"
    end
end


file = ARGV.shift

require 'spongiae/formats/all'

ext = $1 if file =~ /\.(\w+)$/
fmt = Spongiae::Formats::ALL[ext].sniff(file)
fmt.load!
reader = fmt.create(file,formatOptions)

require 'spongiae/stats'

stats = Spongiae::Stats::SegmentsStatsRow.new
reader.read_unit do |unit|
    stats.add_unit(unit.srcText, culter)
end

puts "Units: #{stats.units}"
puts "Segments: #{stats.segments}" unless culter == nil
puts "Words: #{stats.words}"
puts "Characters: #{stats.chars}"
puts "Non-blank characters: #{stats.nbChars}"


