#!/usr/bin/perl

## mapping the Prague Dependency Treebank-like format (SGML)
## into enriched  Penn Treebank-like format (sentence per line; form/lemma/tag);
## to map into clear PennTreebank-like format run pdt2wsj.prl


## INPUT: STDIN  sgml file    
## OUTPUT: STDOUT Penn Treebank-like file



while(<STDIN>) {
  s/^\s*//;
  if (/^<s.*/) {
    print "\n";
  }
  if(/^<[df].*?>(.*?)<l>(.*?)<t>(.*?)[< \n].*/){
     print "$1/$2/$3 ";
  }
}

