#!/usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell # PODNAME: mech-dump # ABSTRACT: Dumps information about a web page use warnings; use strict; use WWW::Mechanize (); use Getopt::Long; use Pod::Usage; use HTTP::Cookies; my @actions; my $absolute; my $all; my $user; my $pass; my $agent; my $agent_alias; my $cookie_filename; GetOptions( 'user=s' => \$user, 'password=s' => \$pass, headers => sub { push( @actions, \&dump_headers ) }, forms => sub { push( @actions, \&dump_forms ) }, links => sub { push( @actions, \&dump_links ) }, images => sub { push( @actions, \&dump_images ) }, all => sub { $all++; push( @actions, \&dump_headers, \&dump_forms, \&dump_links, \&dump_images ) }, text => sub { push( @actions, \&dump_text ) }, absolute => \$absolute, 'agent=s' => \$agent, 'agent-alias=s' => \$agent_alias, 'cookie-file=s' => \$cookie_filename, help => sub { pod2usage(1); }, version => sub { print STDERR $WWW::Mechanize::VERSION, "\n"; exit 0; }, ) or pod2usage(2); my @uris = @ARGV or die "Must specify a URL or file to check. See --help for details.\n"; @actions = (\&dump_forms) unless @actions; binmode(STDOUT, ':utf8'); my $mech = WWW::Mechanize->new( autocheck => 0 ); if ( defined $agent ) { $mech->agent( $agent ); } elsif ( defined $agent_alias ) { $mech->agent_alias( $agent_alias ); } if ( defined $cookie_filename ) { my $cookies = HTTP::Cookies->new( file => $cookie_filename, autosave => 1, ignore_discard => 1 ); $cookies->load() ; $mech->cookie_jar($cookies); } else { $mech->cookie_jar(undef) ; } $mech->env_proxy(); foreach my $uri (@uris) { if ( -e $uri ) { require URI::file; $uri = URI::file->new_abs( $uri )->as_string; } my $response = $mech->get( $uri ); if (!$response->is_success and defined ($response->www_authenticate)) { if (!defined $user or !defined $pass) { die("Page requires username and password, but none specified.\n"); } $mech->credentials($user,$pass); $response = $mech->get( $uri ); $response->is_success or die "Can't fetch $uri with username and password\n", $response->status_line, "\n"; } $mech->is_html or die qq{$uri returns type "}, $mech->ct, qq{", not "text/html"\n}; foreach my $action (@actions ) { $action->( $mech ); print "\n" if @actions; } } sub dump_headers { my $mech = shift; print "--> Headers:\n" if $all; $mech->dump_headers( undef ); return; } sub dump_forms { my $mech = shift; print "--> Forms:\n" if $all; $mech->dump_forms( undef ); return; } sub dump_links { my $mech = shift; print "--> Links:\n" if $all; $mech->dump_links( undef, $absolute ); return; } sub dump_images { my $mech = shift; print "--> Images:\n" if $all; $mech->dump_images( undef, $absolute ); return; } sub dump_text { my $mech = shift; $mech->dump_text(); return; } __END__ =pod =encoding UTF-8 =head1 NAME mech-dump - Dumps information about a web page =head1 VERSION version 1.96 =head1 SYNOPSIS mech-dump [options] [file|url] Options: --headers Dump HTTP response headers --forms Dump table of forms (default action) --links Dump table of links --images Dump table of images --all Dump all four of the above, in that order --text Dumps the textual part of the web page --user=user Set the username --password=pass Set the password --cookie-file=filename Set the filename to use for persistent cookies --agent=agent Specify the UserAgent to pass --agent-alias=alias Specify the alias for the UserAgent to pass. Pick one of: * Windows IE 6 * Windows Mozilla * Mac Safari * Mac Mozilla * Linux Mozilla * Linux Konqueror --absolute Show URLs as absolute, even if relative in the page --help Show this message The order of the options specified is relevant. Repeated options get repeated dumps. Proxy settings are specified through the environment (e.g. C). See LWP::UserAgent for details. =head1 SEE ALSO L =head1 AUTHOR Andy Lester =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2004-2016 by Andy Lester. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut