#!/usr/bin/perl

# Changelog:
# Version 1.1 2001/08/23 hajic added correct handling of tags with attributes,
#                              log file reporting fixed, reporting changed 
#                              to include tag & line, other minor changes
#                              prog name back to Translate_Tags.pl
# Version 1.0 hladka(?)

## mapping Czech compact tags <---> Czech positional tags
## b2800a.f2o - mapping table Czech compact tags --> Czech positional tags
## b2800a.o2f - mapping table Czech positional tags --> Czech compact tags

## INPUT: STDIN csts sgml file
## OUTPUT: STDOUT sgml file
## parameter: mapping table    (e.g. b2800a.o2f, b2800a.f2o)
## Translate_Tags.log - log file

#Logfile
open LOG, ">Translate_Tags.log";
print LOG "Translate_Tags.log", "\n";

# test number of parameters
if ($#ARGV != 0 ) { 
  die "Usage: cat YourSGMLFile | Translate_Tags.pl" . 
      " DICTIONARY > TranslateSGMLFile" . "\n";
}

# open mapping table file
unless (open DICT, $ARGV[0]) { die "Can't open file $ARGV[0]" }

# read in mapping table file
while(<DICT>){
  chomp;
  s/^\s*//;
  @rgszLine = split / /;
  $Dict{$rgszLine[0]} = $rgszLine[1];
};
close DICT;

# translate tags
$iLine = 0;
while(<STDIN>) {
  $iLine++;
  if (/^<[fd][ \t>]/) { 
    chomp;
    $szLineIn = $_;
    @rgTT = split /(<[^>]*>)/, $szLineIn;
    for ($i = 1; $i <= $#rgTT; $i += 2) {
      # print "$i ", "$rgTT[$i]", "\n";
      if ($rgTT[$i] =~ /t([ \t][^>]*)?>/o) { # all SGML tags 
                                             # ending at t supposed to be tags!
        $szNewTag = $Dict{$rgTT[$i+1]};
        if ($szNewTag eq "") {
          print LOG "Tag not found: $rgTT[$i+1], line $iLine: $szLineIn", "\n";
        }
        else { # found!
          $rgTT[$i+1] = $szNewTag;
        } # of tag found in table
      } # of SGML tag containing morph. tag
    } # of loop through split line
    $szLine = join ("",@rgTT);
    print "$szLine", "\n";
  }
  else { print $_; }
} # of while STDIN

print LOG "End of log", "\n";
close LOG;

0;

