#! /usr/bin/perl

=encoding utf-8
=head1 DESCRIPTION

This script imports a TMX file in an Elefas memory

=head1 USAGE

    perl ef-import.pl (DBI-String or #alias) file.tmx <collection?:doc-name?>?

with DBI-String containing all parameters from L<https://metacpan.org/pod/DBD::Pg>
including user and password if necessary
or only the name of the catalog

Alternatively you can create an alias in ../conf/elefas-config.pl
and use this alias preceeded by '#', for example '#test' is an alias defined in file joined in this archive.

Collection (last parameter followed by :) can be any name without spaces. If it does not exist, it will be created.
Doc-name (last parameter preceeded by :) is optional replacement for document name (defaults to the name of the file)

=cut

use XML::Parser;
use DBI;

my $db = shift or die "Syntax: $0 DBI-String file.tmx <collection?:doc-name?>?";
if ($db =~ s/^\#//) {
    require File::Basename; push(@INC,'.');
    require sprintf('%s/../conf/elefas-config.pl', File::Basename::dirname($0));
    $db = $Elefas::Config{databases}{$db} or die "No such alias: $db";
}
die "Syntax: $0 DBI-String file.tmx [collection?]" if $db =~ /\.t\w+/ and $db !~ /=/;

$db = "dbname=$db" unless $db =~ /=/;
$db = "dbi:Pg:$db" unless $db =~ /^dbi:/i;

my $user = undef; $user = $1 if $db =~ s/\buser(?:name)?=(.+?)(;|$)//;
my $pass = undef; $pass = $1 if $db =~ s/\bpass(?:word)?=(.+?)(;|$)//;

my $conn = DBI->connect($db,$user,$pass);

my $st_ins_unit = $conn->prepare(
       'insert into ef_unit(tuid,doc_id) values(?,?) returning id'
);
my $st_ins_seg = $conn->prepare(
	'insert into ef_seg(unit,author,lang,contents)
	values(?,?,?,?)
	returning id'
);

my $file = shift or die "Syntax: $0 DBI-String file.tmx <collection?:doc-name?>?";
die "File must be in TMX format" unless $file =~ /\.tmx$/i;

if (my $collection = shift) {
	my $docName; ($collection, $docName) = split(/:/, $collection); $docName ||= $file;
	my ($collId) = $conn->selectrow_array("select id from EF_COLLECTION where name='$collection'");
	($collId) = $conn->selectrow_array("insert into EF_COLLECTION(name) values('$collection') returning id")  unless $collId;

	our ($docId) = $conn->selectrow_array("insert into EF_DOC(name,collection) 
		values ('$docName',$collId) returning id");
} else {
	our ($docId) = $conn->selectrow_array("insert into EF_DOC(name) values ('$file') returning id");
}

my $count_tu, $count_tuv = 0;
my $cur_lang = undef, $cur_unit = undef, $cur_author = undef, $seg_txt = undef;

my $parser = new XML::Parser(Handlers => {
		Start => \&handle_start, End => \&handle_end, Char  => \&handle_char
	});
$parser->parsefile($file);

print STDERR "$count_tu entries, $count_tuv segments imported\n";

sub handle_start {
	my (undef, $el, %attr) = @_;
	if (($el eq 'tu') or ($el =~ /:tu$/)) {
		my $tuid = $attr{tuid} || "tu-$count_tu";
		my $res = $st_ins_unit->execute($tuid,$docId);
		$count_tu += $res; $cur_lang = $seg_txt = undef;
		$cur_unit = $st_ins_unit->fetch()->[0];
		$cur_author = $attr{'changeid'} || $attr{'creationid'};
	}
	if (($el eq 'tuv') or ($el =~ /:tuv$/)) {
		$cur_lang = $attr{'xml:lang'} || $attr{'lang'};
		$cur_lang = substr($cur_lang, 0, 2);
		$cur_author = $attr{'changeid'} || $attr{'creationid'}
			if $attr{'changeid'} || $attr{'creationid'};
		$cur_author ||= ''; # not undef
	}
	if (($el eq 'seg') or ($el =~ /:seg$/)) {
		$seg_txt = '';
	}
}

sub handle_end {
	my (undef, $el) = @_;
	if (($el eq 'seg') or ($el =~ /:seg$/)) {
		$count_tuv += $st_ins_seg->execute($cur_unit, $cur_author, $cur_lang, $seg_txt);
		$seg_txt = undef;
	}

}

sub handle_char {
	my (undef, $txt) = @_;
	$seg_txt .= $txt if defined $seg_txt;
}

=head1 LICENSE

Copyright 2013 Silvestris Project (L<http://www.silvestris-lab.org/>)

Licensed under the EUPL, Version 1.1 or – as soon they will be approved by the European Commission - subsequent versions of the EUPL (the "Licence");
You may not use this work except in compliance with the Licence.
You may obtain a copy of the Licence at: L<http://ec.europa.eu/idabc/eupl>

Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the Licence for the specific language governing permissions and limitations under the Licence. 

=cut


