#!/usr/bin/perl

## mapping the Prague Dependency Treebank-like format (SGML)
## into the Penn Treebank-like format (tagged part: form/tag) 

## INPUT: STDIN  sgml file    
## OUTPUT: STDOUT Penn Treebank-like file


while(<STDIN>) {
  s/^\s*//;
  if (/^<s.*/) {
    print "\n";
  }
  if(/^<[df].*?>(.*?)<l>(.*?)<t>(.*?)[< \n].*/){
     print "$1/$3 ";
  }
}

