1 #!/usr/bin/perl
2
3 use strict;
4
5 use Carp;
6 use WWW::Mechanize;
7 use HTML::TreeBuilder;
8 use Data::Dumper;
9
10 #
11 # Assemble our Bot
12 my $mech = WWW::Mechanize->new();
13
14 #
15 # Retrieve the content
16 #------------------------------------------------------------------------#
17 $mech->get('http://qa.perl.org/phalanx/100/');
18
19 my $tree = HTML::TreeBuilder->new_from_content( $mech->content );
20 $tree->elementify();
21
22 #
23 # Find all our tables
24 my @tbls = $tree->find( 'table' );
25
26 #
27 # The table we're interested in is the second table
28 my $tbl = $tbls[1];
29
30 #
31 # Make sure that we have a second table!
32 croak "unable find a second table: $!\n"
33 unless defined $tbl;
34
35 #
36 # Collection Variables
37 my @TopTen = ();
38 my $grabText = 0;
39
40 #
41 # Main Loop, go through all our TR's
42 foreach my $tr ( $tbl->find( 'tr' ) ) {
43
44 #
45 # The only data we're looking for is in the first td
46 my $cell = $tr->look_down('_tag', 'td');
47
48 #
49 # Base Case, if we've grabbed 10, exit the loop
50 if(@TopTen >= 10) {
51 last;
52 }
53 elsif($grabText) {
54 #
55 # grabText is set, grab the data.
56 push @TopTen, $cell->as_text;
57 }
58 else {
59 #
60 # set grabText if the cell is class banner and the text is 'the top 10'
61 if( $cell->attr('class') eq 'banner' && lc( $cell->as_text) eq 'the top 10') {
62 $grabText = 1;
63 }
64 }
65 }
66
67 #
68 # use Data::Dumper to print the array
69 print Dumper( \@TopTen );
syntax highlighted by Code2HTML, v. 0.9.1