#!/bin/tcsh -f
#
# run_parser -- run Collins' parser
#

echo ${0:t}": start "`date`

if ($#argv != 2) then
	echo ${0:t} $*": usage: ${0:t} <input_dir> <output_dir>"
	exit 1
endif

set INDIR=$1
set OUTDIR=$2
set INSUFF=csts
set OUTSUFF=csts
set TMPSUFF=tmp
set LOG=$OUTDIR/${OUTDIR:t}.log
set TMPIN=$OUTDIR/${OUTDIR:t}.whole-in
set TMPOUT=$OUTDIR/${OUTDIR:t}.whole-out

setenv COLLINS_PATH ${0:h}
set KEEPUPTO=60

# for deleting MM[lt] tags:
set DEL=${0:h}/z9800.x
# for ignoring sentences of length $KEEPUPTO or more:
set IGN=${0:h}/b8819.pl
# for adding <r> to tokens:
set ADD_R=${0:h}/c8801.pl

set PARSER=$COLLINS_PATH/exec/collins.pl
set SPLIT=${0:h}/split_joined.pl

'rm' -f $LOG
'rm' -f $TMPIN
'rm' -f $TMPOUT
mkdir -p $OUTDIR

echo "Processing the files $INDIR/*.$INSUFF"

# preprocessing

echo ${0:t}": Sentences over length $KEEPUPTO ignored" >>&! $LOG

foreach f ($INDIR/*.$INSUFF)
	echo "File ${f}:" >>&! $LOG
	($DEL MMl MMt < $f | \
	$IGN $KEEPUPTO | \
	$ADD_R > $OUTDIR/${f:t:r}.$TMPSUFF) >>&! $LOG
end

# parsing

echo "The files $OUTDIR/*.$TMPSUFF merged into $TMPIN" >>&! $LOG
foreach f ($OUTDIR/*.$TMPSUFF) 
	echo '<s id="FILENAME:'$OUTDIR/${f:t:r}.$OUTSUFF'">' >>! $TMPIN
	cat $f >>! $TMPIN
	rm -f $f
end

$PARSER $TMPIN $TMPOUT >>&! $LOG
if ( { grep 'Assertion .* failed' $LOG } ) exit 1

echo "The file $TMPOUT split into $OUTDIR/*.$OUTSUFF" >>&! $LOG
$SPLIT < $TMPOUT

'rm' -f $TMPIN
'rm' -f $TMPOUT
'rm' -f gmon.out

echo ${0:t}": done  "`date`
