#!/usr/bin/env perl
# validate_pml                   pajas@ufal.ms.mff.cuni.cz
#                                06 říj 2006

use warnings;
use strict;

$|=1;   # flush on write

use FindBin;
use File::Temp qw(tempfile tempdir);
use File::Basename;
use File::Spec;
use Pod::Usage;
use XML::LibXML;
use XML::LibXSLT;
use XML::LibXML::XPathContext;

use constant EMPTY => q{};

use Getopt::Long;
Getopt::Long::Configure ("bundling");
my %opts;
GetOptions(\%opts,
	'debug|D',
	'quiet|q',
	'help|h',
	'usage|u',
	'no-simplify|n',
	'pmldir|d=s',
	'path|p=s@',
	'man',
       ) or $opts{usage}=1;

my $DEBUG = $opts{debug};
sub _DEBUG { print STDERR @_ if $DEBUG }

if ($opts{usage}) {
  pod2usage(-msg => 'validate_pml');
}
if ($opts{help}) {
  pod2usage(-exitstatus => 0, -verbose => 1);
}
if ($opts{man}) {
  pod2usage(-exitstatus => 0, -verbose => 2);
}

$opts{pmldir} ||= $FindBin::RealBin;
@{$opts{path}} = map { split /:/,$_ } @{$opts{path}};

#our $tmpdir = tempdir( CLEANUP => 1 );
our $parser = XML::LibXML->new();
our $xpath = XML::LibXML::XPathContext->new;
our $xslt = XML::LibXSLT->new;
our $pml2rng = $xslt->parse_stylesheet( 
  $parser->parse_file( $opts{pmldir}.'/pml2rng.xsl' )
);

$xpath->registerNs('pml', 'http://ufal.mff.cuni.cz/pdt/pml/');

my $fail = 0;
for my $file (@ARGV) {
  print "processing $file\n" if $opts{debug};
  $fail = validate_pml( $file ) || $fail;
}
exit $fail;

our %rng_cache;
sub validate_pml {
  my $file = shift;
  my $doc = eval { $parser->parse_file($file) };
  if (!$doc or $@) {
    warn "$@\n";
    return 1;
  }
  my $schema_href = $xpath->findvalue('/*/pml:head[1]/pml:schema/@href',$doc);
  if (defined($schema_href) and $schema_href ne EMPTY) {
    unless ($schema_href=~m{^/|^http://}) {
      my $abs = File::Spec->rel2abs( $schema_href, dirname($file) );
      unless (exists($rng_cache{ $abs }) || -f $abs) {
	for my $dir (@{$opts{path}}) {
	  $abs = File::Spec->rel2abs( $schema_href, $dir );
	  if (exists($rng_cache{ $abs }) || -f $abs) {
	    $schema_href = $abs;
	    last;
	  }
	}
      }
      $schema_href = $abs;
      unless (-f $schema_href) {
	warn "$file: PML schema '$schema_href' not found\n";
	return 2;
      }
    }
  } elsif ($xpath->findvalue('count(/*/pml:head[1]/pml:schema)',$doc)) {
    $schema_href = $file;
  } else {
    warn "$file: instance declares no PML schema\n";
    return 2;
  }
  unless (exists $rng_cache{ $schema_href }) {
    my $simplify = File::Spec->rel2abs( 'pml_simplify', $FindBin::RealBin );
    
    my $simplified;
    {
      print STDERR "simplifying $schema_href\n" if $opts{debug};
      local $/;
      my $fh;
      if ($opts{'no-simplify'}) {
	open($fh, '<', $schema_href) ||
	       die "cannot open $schema_href: $!";
      } else {
	open($fh, '-|', $simplify,
	     (defined($opts{path}) ? map { ('-p', $_) } @{$opts{path}} : ()),
	     $schema_href) ||
	       die "cannot run pml_simplify: $!";
      }
      $parser->base_uri( $schema_href );
      $simplified = $parser->parse_fh( $fh );
      close $fh;
    }
    my $rng_dom = $pml2rng->transform( $simplified );
    $rng_dom->setBaseURI( $opts{pmldir}.'/'.basename($schema_href).'.rng' );
    $rng_cache{ $schema_href } = XML::LibXML::RelaxNG->new( DOM => $rng_dom );
  }
  eval { $rng_cache{ $schema_href }->validate( $doc ); };
  if ($@) {
    warn "$file: $@\n";
    print "$file: does not validate\n" unless $opts{quiet};
    return 3;
  }
  print "$file: validates\n" unless $opts{quiet};
  return 0;
}

__END__

=head1 NAME

validate_pml

=head1 SYNOPSIS
   
  validate_pml [--quiet|-q] [ --pmldir|d path-to-pml-tools ] 
	[--path|p resource-paths] [--no-simplify|-n] file [...]

Get help:

  validate_pml  -u|--usage          for usage (synopsis)
  validate_pml  -h|--help           for help
  validate_pml  --man               for the manual page

=head1 DESCRIPTION

validate_pml extracts path to PML schema from given PML instances,
locates the the schema, simplifies it via pml_simplify (unless
--no-simplify is used), converts it to Relax NG (using pml2rng.xsl
stylesheet), and uses the result to validate the PML instance.

Relax NG schemas are cached in memory in order to speed up validation
of multiple instances of the same PML schema.

=head1 OPTIONS

=over 4

=item B<--no-simplify>

Consider all PML schemas simplified.

=item B<--path|-p> dir

Try to locate missing schemas in a given directory. This option may be
repeated.

=item B<--pmldir|-d> dir

Path to auxiliary PML files: C<pml2rng.xsl>, C<pml_common.rng>, and
C<pml_internal.rng>.

=item B<--quiet|-q>

Be quiet (only fatal errors are reported). Exit status 0 indicates
that all files were valid.

=item B<--debug|-D>

Print debugging info.

=back

=head1 AUTHOR

Petr Pajas, E<lt>pajas@ufal.ms.mff.cuni.czE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006 by Petr Pajas

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.

=head1 BUGS

None reported... yet.

=cut
