1 #!/usr/bin/perl
 2 
 3 use strict;
 4 
 5 use Carp;
 6 use WWW::Mechanize;
 7 use HTML::TreeBuilder;
 8 use Data::Dumper;
 9 
10 #
11 # Assemble our Bot
12 my $mech = WWW::Mechanize->new();
13 
14 #
15 # Retrieve the content
16 #------------------------------------------------------------------------#
17 $mech->get('http://qa.perl.org/phalanx/100/');
18 
19 my $tree = HTML::TreeBuilder->new_from_content( $mech->content );
20 $tree->elementify();
21 
22 #
23 # Find all our tables
24 my @tbls = $tree->find( 'table' );
25 
26 #
27 # The table we're interested in is the second table
28 my $tbl = $tbls[1];
29 
30 #
31 # Make sure that we have a second table!
32 croak "unable find a second table: $!\n"
33 	unless defined $tbl;
34 
35 #
36 # Collection Variables
37 my @TopTen = ();
38 my $grabText = 0;
39 
40 #
41 # Main Loop, go through all our TR's
42 foreach my $tr ( $tbl->find( 'tr' ) ) {
43 	
44 	#
45 	# The only data we're looking for is in the first td
46 	my $cell = $tr->look_down('_tag', 'td'); 
47 
48 	#
49 	# Base Case, if we've grabbed 10, exit the loop
50 	if(@TopTen >= 10) {
51 		last;
52 	}
53 	elsif($grabText) {
54 		#
55 		# grabText is set, grab the data.
56 		push @TopTen, $cell->as_text;
57 	} 
58 	else {
59 		#
60 		# set grabText if the cell is class banner and the text is 'the top 10'
61 		if( $cell->attr('class') eq 'banner'  && lc( $cell->as_text) eq 'the top 10') {
62 			$grabText = 1;
63 		}
64 	}
65 }
66 
67 #
68 # use Data::Dumper to print the array
69 print Dumper( \@TopTen );


syntax highlighted by Code2HTML, v. 0.9.1