#!/usr/local/bin/perl

# Reads dependency structures (CSTS file format).
# Writes phrase structures.

print "\@N ord\n";
print "\@P lemma\n";
print "\@P tag\n";
print "\n";

$first_tree = 1;
while(<>)
{
    chomp $_;
    if(($_ =~ "^<[fd]( [^>]*)?>") || ($_ =~ "^<fadd( [^>]*)?>"))
    {
	# Add the node to the dependency tree.
	$sgml = $_;
	$gnode = pick($sgml, "g");
	$dnode = pick($sgml, "r");
	$dlemma = pick($sgml, "l");
	$dtag = pick($sgml, "t");
	encode($dlemma, $dtag);
	$deptree[$gnode] .= $dnode . ":" . $dlemma . ":" . $dtag . ",";
	$deptree[$dnode] = $dnode . ":" . $dlemma . ":" . $dtag . " --> "
	    . $deptree[$dnode];
    }
    elsif($_ =~ "^<s")
    {
	if($first_tree)
	{
	    $first_tree = 0;
	}
	else
	{
	    for($i = 0; $i<=$#deptree; $i++)
	    {
		dep2con($i);
	    }
	    # Print the constituent tree in FS format.
	    @fs_tree = @contree;
#	    dump_fs_tree();
	    print_fs_tree(0); print "\n";
	}
	# Erase the trees.
	$#deptree = -1;
	$#contree = -1;
	$deptree[0] = $_;
	$deptree[0] =~ s|<s id="?(.*)/.*/.*"?>|$1|;
	encode($deptree[0]);
	$deptree[0] =~ s/(.*)/0:$1:ZSB --> /;
    }
}

print "(1)\n";



###############################################################################
# SUBROUTINES
###############################################################################



#------------------------------------------------------------------------------
# Converts the dependency tree $deptree to the constituent tree $contree.
#------------------------------------------------------------------------------
sub dep2con
{
    # Take parameters from the stack.
    local($i_node, $nodeinfo, @nodeinfo, @children, $new_children, $new_nodeinfo, $i, $tag);
    $i_node = $_[0];
    $nodeinfo = $deptree[$i_node];
    @nodeinfo = split(" --> ", $nodeinfo);

    # Convert node representation to a better format. The attributes of each
    # node are at two places: at the beginning of the node info, AND in the
    # link from the node info of its parent. From now, the link will be only
    # the node index, and all other attributes will be stored in the node info
    # of the node they apply to.
    $nodeinfo[1] =~ s/(\d+):[^:,]*:[^:,]*,/$1,/g;
    @children = split(",", $nodeinfo[1]);

    # Browse the children and find the position of the parent in the sentence.
    $new_children = $nodeinfo[1];
    if($i_node > 0)
    {
	for($i = 0; $i<=$#children; $i++)
	{
	    if($children[$i] > $i_node)
	    {
		last;
	    }
	}
	# Insert a self link to the list of links to children. Instead of the
	# node index, insert the word "self" to distinguish from real links and
	# to prevent an endless recursion.
	if($i>0)
	{
	    $new_children =~ s/((\d+,){$i})((\d+,)*)/$1self,$3/;
	}
	else
	{
	    $new_children = "self,$new_children";
	}
    }

    # Modify the parent node.
    ($ord = $nodeinfo[0]) =~ s/([^:]*):[^:]*:[^:]*/$1/;
    ($lemma = $nodeinfo[0]) =~ s/[^:]*:([^:]*):[^:]*/$1/;
    ($tag = $nodeinfo[0]) =~ s/[^:]*:[^:]*:([^:]*)/$1/;
    decode($lemma, $tag);
    # Form a nonterminal from the POS tag of the headword.
    ($pos = $tag) =~ s/^(.).*/$1/;
    if($pos eq "N" || $pos eq "A" || $pos eq "C")
    {
	($nterm = $tag) =~ s/^(.)...(.).*/$1P$2/;
    }
    elsif($pos eq "P")
    {
	($nterm = $tag) =~ s/^.(.)..(.).*/Pron$1$2/;
    }
    elsif($pos eq "V")
    {
	$nterm = "VP";
    }
    elsif($pos eq "D")
    {
	$nterm = "AdvP";
    }
    elsif($pos eq "R")
    {
	($nterm = $tag) =~ s/^....(.).*/PP$1/;
    }
    elsif($ord == 0)
    {
	$nterm = "S";
    }
    else
    {
	($nterm = $tag) =~ s/^(..).*/$1/;
    }
    $lemma =~ s/_.*//;
    $nlterm = "$nterm($lemma)";
    encode($nlterm, $lemma, $tag);
    # Change the x-coordinate of the node in the tree view.
    if($#children>=0)
    {
	$xpos = middle($ord, $children[0], $children[$#children]);
    }
    else
    {
	$xpos = $ord;
    }
    # Combine the parent with the children.
    $new_nodeinfo = "$ord:$nlterm:$lemma:$tag:$xpos --> $new_children";
    $contree[$i_node] = $new_nodeinfo;
}



#------------------------------------------------------------------------------
# Finds minimum and maximum among the parameters and returns the middle.
#------------------------------------------------------------------------------
sub middle
{
    local($min, $max, $mid, $i);
    $min = $max = $_[0];
    for($i = 1; $i<=$#_; $i++)
    {
	if($_[$i]<$min)
	{
	    $min = $_[$i];
	}
	if($_[$i]>$max)
	{
	    $max = $_[$i];
	}
    }
    $mid = ($min+$max)/2;
    return $mid;
}



#------------------------------------------------------------------------------
# Picks word attribute by SGML tag.
#------------------------------------------------------------------------------
sub pick
{
    local($sgml, $element, $result);
    $sgml = $_[0];
    $element = $_[1];
    $result = $sgml;
    $result =~ s/.*<$element>([^<]*).*/$1/;
    return $result;
}



#------------------------------------------------------------------------------
# Hides special meaning of characters we use to separate internal data.
#------------------------------------------------------------------------------
sub encode
{
    local($i);
    for($i = 0; $i<=$#_; $i++)
    {
	$_[$i] =~ s/&/&amp;/g;
	$_[$i] =~ s/:/&colon;/g;
	$_[$i] =~ s/,/&comma;/g;
    }
}



#------------------------------------------------------------------------------
# Revitalizes escaped characters.
#------------------------------------------------------------------------------
sub decode
{
    local($i);
    for($i = 0; $i<=$#_; $i++)
    {
	$_[$i] =~ s/&amp;/&/g;
	$_[$i] =~ s/&colon;/:/g;
	$_[$i] =~ s/&comma;/,/g;
    }
}



#------------------------------------------------------------------------------
# Prints a tree in FS format.
#------------------------------------------------------------------------------
sub print_fs_tree
{
    local($i_node, $i, $children, @children, $node,
	  $ord, $nlterm, $lemma, $tag, $xpos);
    $i_node = $_[0];
    ($node, $children) = split(" --> ", $fs_tree[$i_node]);
    ($ord, $nlterm, $lemma, $tag, $xpos) = split(":", $node);
    decode($nlterm, $lemma, $tag);
    $nlterm =~ s/,/\\,/g;
    $lemma =~ s/,/\\,/g;
    $tag =~ s/,/\\,/g;
    $lemma =~ s/_.*//;
    print "[$xpos,$nlterm,NT]";
    @children = split(",", $children);
    if($#children>=0)
    {
	print "(";
	if($children[0] eq "self")
	{
	    print "[$i_node,$lemma,$tag]";
	}
	else
	{
	    print_fs_tree($children[0]);
	}
	for($i = 1; $i<=$#children; $i++)
	{
	    print ",";
	    if($children[$i] eq "self")
	    {
		print "[$i_node,$lemma,$tag]";
	    }
	    else
	    {
		print_fs_tree($children[$i]);
	    }
	}
	print ")";
    }
}



#------------------------------------------------------------------------------
# Prints node info for each node.
#------------------------------------------------------------------------------
sub dump_fs_tree
{
    for($i = 0; $i<=$#fs_tree; $i++)
    {
	print "$i: $fs_tree[$i]\n";
    }
    print "\n";
}

