A. Script Listings

Search in book...
Toggle Font Controls
Create new playlist

Name your new playlist

Playlist description (optional)
Sign In

Email address

Password

Forgot Password?

or

Continue with Facebook

Continue with Google
Sign Up

Full Name

Email address

Confirm Email Address

Password

or

Continue with Facebook

Continue with Google

Appendix A. Script Listings

Chapter 2

Please note that some of these scripts are used to query search engines. Take extra caution before using these scripts in particular.

spiderviewer.php

<html>

<head>
<title>Search Engine Web Page Viewer</title>
</head>

<body>

<form name=mainform action="" method="get">
<table border="0" width="100%" align=center>
    <tr>
        <td>Enter URL: <br>
        <input type="text" name="url" size="20"></td>
    </tr>
    <tr>
        <td>
        <input type="submit" value="Click to See Search Engine View" name="submit">
        </td>
    </tr>
</table>
</form>
<hr>

<?php

$myurl = $_GET['url'];

if (isset($myurl)) {
   print spiderViewer($myurl);
}

?>

</body>
</html>

<?php

function spiderViewer($url) {
    $finalHTML='';
    if($url) {
      $originalHTML=get_content($url);
      if($originalHTML) {
        $finalHTML.='<table border="0" align="center" width="75%">';
        $finalHTML.='<tr><td align="center" valign="top">';
        $finalHTML.='<b>Search Engine View for URL:'  . $url .
'</b></tr>';
        $finalHTML.='<tr><td align="left" valign="top">';
        $originalHTML=preg_replace('/<script.*?>.*?</script.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<object.*?>.*?</object.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<applet.*?>.*?</applet.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<style.*?>.*?</style.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<.*?>/sim','',$originalHTML);
        $originalHTML=preg_replace('/&[#]{0,1}.[^ ]*;/sim',' '
,$originalHTML);
        $stopWordsArray=explode("<br />",
file_get_contents('stopwords.txt'));

        for($tmploop=0;$tmploop<count($stopWordsArray);$tmploop++) {
            $originalHTML=preg_replace('/[W]{1,1}' .
$stopWordsArray[$tmploop] . '[W]{1,1}/sim','',$originalHTML);
        }

        $originalHTML=preg_replace('/[^A-Z0-9a-z.?!;,-
 ]*/sim',''
,$originalHTML);
        $originalHTML=preg_replace('/[
 ]{2,1000}/sim',' '
,$originalHTML);
        $finalHTML.= $originalHTML . '</td></tr></table>';
      } else {
        $finalHTML='Please check your URL.';
      }
    } else {
      $finalHTML='The url you entered was invalid.';
    }
    return $finalHTML;
}

function get_content($url)
{
   $ch = curl_init();
   curl_setopt ($ch, CURLOPT_URL, $url);
   curl_setopt ($ch, CURLOPT_HEADER, 0);
   curl_setopt($ch, CURLOPT_FAILONERROR, 0);
   curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
   curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/4.0 (compatible;
MSIE 8.0; Windows NT 6.0)'),
   curl_setopt($ch, CURLOPT_TIMEOUT, 30);
   if(preg_match('/^https:///sim',$url)==true) {
      curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
      curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
   }
   ob_start();
   curl_exec ($ch);
   curl_close ($ch);
   $string = ob_get_contents();
   ob_end_clean();
   return $string;
}

Chapter 3

layout1.html

<html>
<head>
<style>
#navigation {
position: absolute;
top: 10px;
left: 50%;
width: 800px;
margin-left: −400px;
text-align: left;
}

#content {
position: absolute;
top: 150px;
left: 50%;
width: 800px;
margin-left: −400px;
text-align: left;
}

body {
    text-align: center;
    min-width: 600px;
}
</style>
</head>
<body>

<div id="content">content<!-- SEO optimized content text goes here.--></div>

<div id="navigation">navigation<!-- navigational elements, ads go here--></div>

</body>
</html>

layout2.html

<html>
<head>

<style>
#navigation {
position: absolute;
top: 0px;
left: 400;
width: 200px;
margin-left: −400px;
text-align: left;
}

#content {
position: absolute;
top: 0px;
left: 600;
width: 600px;
margin-left: −400px;
text-align: left;
}

body {
    text-align: center;
    min-width: 800px;
}
</style>
</head>
<body>

<div id="content">
SEO optimized content text goes here.</div>

<div id="navigation">navigational elements, ads go here</div>

</body>
</html>

layout3.html

<html>
<head>
<style>

#top {
position: absolute;
top: 10px;
left: 50%;
width: 800px;
margin-left: −400px;
text-align: left;
}

#left {
position: absolute;
top: 150px;
left: 50%;
width: 200px;
margin-left: −400px;
text-align: left;
}

#main {
position: absolute;
top: 150px;
left: 50%;
width: 600px;
margin-left: −200px;
text-align: left;
}

#right {
position: absolute;
top: 150px;
left: 50%;
width: 200px;
margin-left: 0px;
text-align: left;
}

body {
    text-align: center;
    min-width: 800px;
}

</style>
</head>

<body>

<div id="main">optimized main body</div>

<div id="left">left panel</div>

<div id="top">top panel</div>

<div id="right">right panel</div>

</body>

</html>

Chapter 4

rankingfactors.pl

#!/usr/local/bin/perl
###########################################################
# File: rankingfactors.pl                                 #
# Description: This script performs analysis on several   #
#              ranking factors including:                 #
#  1) Keywords in Page Titles                             #
#  2) Keywords in Domain Names                            #
#  3) Keywords in Page Copy                               #
#  4) Keywords in Headings                                #
#  5) Keywords in Meta description                        #
#  6) Keyword Proximety                                   #
#  7) Keywords in Outbound Links                          #
#  8) Page Size                                           #
#  9) Words per Page                                      #
# 10) Website Size                                        #
# and more...                                             #
#                                                         #
# Format: perl rankingfactors.pl 10|100 keyword(s)        #
###########################################################

use LWP::Simple;
use LWP::UserAgent;
use HTML::TokeParser;
use HTML::TreeBuilder;
use File::Path;
use Math::Round qw(:all);

my $keyphrase = "";

my @googleLinks  = ( );
my @googleTitles = ( );
my @yahooLinks   = ( );
my @yahooTitles  = ( );
my @bingLinks    = ( );
my @bingTitles   = ( );

#buid keyphrase/keyword if necessary
foreach $argnum (1 .. $#ARGV) {
   $keyphrase = $keyphrase . "$ARGV[$argnum] ";
}
my $numres = $ARGV[0];
$keyphrase =~ s/^s+//;
$keyphrase =~ s/s+$//;
$keyphrase =~ s/'//g;
$keyphrase =~ s/"//g;


print "
Starting..";
#cleanup temp files
rmtree( './serptemp', {keep_root => 1} );
print "
..cleanup done";
#initialize variables
initializeKeyVariables($keyphrase,     @googleLinks,
                    @googleTitles, @yahooLinks,
                    @yahooTitles,  @bingLinks,
                    @bingTitles);

#let's store all destination links found on SERPs
print "
..getting SERPs";
getSERPResults($#googleLinks, @googleLinks, "google");
getSERPResults($#yahooLinks,@yahooLinks, "yahoo");
getSERPResults($#bingLinks,@bingLinks, "bing");
print "
..got the SERPs";

#-------------------TITLE Analysis-----------------------
#get real titles
my @googleRealTitles = ( );
my @yahooRealTitles  = ( );
my @bingRealTitles   = ( );
getRealTitles($#googleLinks, @googleRealTitles, "google");
getRealTitles($#yahooLinks,@yahooRealTitles, "yahoo");
getRealTitles($#bingLinks,@bingRealTitles, "bing");
print "
..got the real titles";

#compare real titles with titles on SERPs
my @googleTitleComp = ( );
my @yahooTitleComp  = ( );
my @bingTitleComp   = ( );
my $percentMatchTitlesGoogle = compareArrays($#googleTitles,@googleRealTitles, 
@googleTitles,

@googleTitleComp);
my $percentMatchTitlesYahoo = compareArrays($#yahooTitles,@yahooRealTitles, 
@yahooTitles, @yahooTitleComp);
my $percentMatchTitlesBing = compareArrays($#bingTitles,@bingRealTitles, 
@bingTitles, @bingTitleComp);
print "
..finished partial title comparisons";

#find keyword title matches
my @googleKeywordTitleMatch = ( );
my @yahooKeywordTitleMatch  = ( );
my @bingKeywordTitleMatch   = ( );
getKeywordsTitleMatch($keyphrase, @googleRealTitles,$#googleRealTitles, 
@googleKeywordTitleMatch );

getKeywordsTitleMatch($keyphrase, @yahooRealTitles, $#yahooRealTitles, 
@yahooKeywordTitleMatch);
getKeywordsTitleMatch($keyphrase, @bingRealTitles, $#bingRealTitles, 
@bingKeywordTitleMatch);
print "
..finished keyword title comparisons";

#find if keyword in title found in page copy
my @googleKeywordTitlePageCopy = ( );
my @yahooKeywordTitlePageCopy  = ( );
my @bingKeywordTitlePageCopy   = ( );
compareTitlePageCopy($#googleRealTitles,@googleRealTitles, 
@googleKeywordTitlePageCopy, "google");
compareTitlePageCopy($#yahooRealTitles,@yahooRealTitles, 
@yahooKeywordTitlePageCopy, "yahoo");
compareTitlePageCopy($#bingRealTitles,@bingRealTitles, 
@bingKeywordTitlePageCopy, "bing");
print "
..finished title page copy comparisons";


#-------------------Domain Name Analysis-----------------------

#exact match
my @googleDomainKeywordExactMatch = ( );
my @yahooDomainKeywordExactMatch  = ( );
my @bingDomainKeywordExactMatch   = ( );
my $percentDomainKeywordExactMatchGoogle =  keywordDomainExactMatch($keyphrase, 
@googleLinks, $#googleLinks,

@googleDomainKeywordExactMatch);
my $percentDomainKeywordExactMatchYahoo = keywordDomainExactMatch($keyphrase, 
@yahooLinks, $#yahooLinks,

@yahooDomainKeywordExactMatch);
my $percentDomainKeywordExactMatchBing = keywordDomainExactMatch($keyphrase, 
@bingLinks, $#bingLinks,

@bingDomainKeywordExactMatch);
print "
..finished domain name exact keyword analysis";


#partial match
my @googleDomainKeywordPartialMatch = ( );
my @yahooDomainKeywordPartialMatch  = ( );
my @bingDomainKeywordPartialMatch   = ( );
$percentDomainKeywordPartialMatchGoogle =  keywordDomainPartialMatch($keyphrase, 
@googleLinks, $#googleLinks,

@googleDomainKeywordPartialMatch);
$percentDomainKeywordPartialMatchYahoo = keywordDomainPartialMatch($keyphrase, 
@yahooLinks, $#yahooLinks,

@yahooDomainKeywordPartialMatch);
$percentDomainKeywordPartialMatchBing = keywordDomainPartialMatch($keyphrase, 
@bingLinks, $#bingLinks,

@bingDomainKeywordPartialMatch);
print "
..finished domain name partial keyword analysis";



#-------------------Page Copy Analysis----------------------------
my @googleKeywordDensity = ( );
my @yahooKeywordDensity  = ( );
my @bingKeywordDensity   = ( );

my $googleAvgDensity = keywordDensity($#googleLinks, $keyphrase, 
@googleKeywordDensity, "google");
my $yahooAvgDensity = keywordDensity($#yahooLinks, $keyphrase, 
@yahooKeywordDensity, "yahoo");
my $bingAvgDensity = keywordDensity($#bingLinks, $keyphrase, 
@bingKeywordDensity, "bing");

#-------------------Description META Tag Analysis------------------
my @googleDescriptionMetaExact = ( );
my @yahooDescriptionMetaExact  = ( );
my @bingDescriptionMetaExact   = ( );

checkExactDescriptionMeta($#googleLinks, @googleDescriptionMetaExact, 
$keyphrase, "google");
checkExactDescriptionMeta($#yahooLinks, @yahooDescriptionMetaExact, 
$keyphrase, "yahoo");
checkExactDescriptionMeta($#bingLinks, @bingDescriptionMetaExact, 
$keyphrase, "bing");


my @googleDescriptionMetaPartial = ( );
my @yahooDescriptionMetaPartial  = ( );
my @bingDescriptionMetaPartial   = ( );

checkPartialDescriptionMeta($#googleLinks, @googleDescriptionMetaPartial, 
$keyphrase, "google");
checkPartialDescriptionMeta($#yahooLinks, @yahooDescriptionMetaPartial, 
$keyphrase, "yahoo");
checkPartialDescriptionMeta($#bingLinks, @bingDescriptionMetaPartial, 
$keyphrase, "bing");
print "
..finished description META analysis";

#-------------------Header Tag Analysis----------------------------
my @googleNumberOfHeaderTags = ( );
my @yahooNumberOfHeaderTags  = ( );
my @bingNumberOfHeaderTags   = ( );
my @googleHeaderTagsKeywords = ( );
my @yahooHeaderTagsKeywords  = ( );
my @bingHeaderTagsKeywords   = ( );

checkHeaderTags($#googleLinks, @googleNumberOfHeaderTags, 
@googleHeaderTagsKeywords, "google", $keyphrase);
checkHeaderTags($#yahooLinks, @yahooNumberOfHeaderTags, 
@yahooHeaderTagsKeywords, "yahoo", $keyphrase);
checkHeaderTags($#bingLinks, @bingNumberOfHeaderTags, 
@bingHeaderTagsKeywords, "bing", $keyphrase);
print "
..finished header tags analysis";


#-------------------Keyword Proximity Analysis---------------------
my @googleKeywordPositions = ( );
my @yahooKeywordPositions  = ( );
my @bingKeywordPositions   = ( );
my @googleKeywordPositionsList = ( );
my @yahooKeywordPositionsList  = ( );
my @bingKeywordPositionsList   = ( );
analyzeKeywordPositions($#googleLinks, @googleKeywordPositions, 
@googleKeywordPositionsList, "google",

$keyphrase);
analyzeKeywordPositions($#yahooLinks, @yahooKeywordPositions, 
@yahooKeywordPositionsList, "yahoo", $keyphrase);
analyzeKeywordPositions($#bingLinks, @bingKeywordPositions, 
@bingKeywordPositionsList, "bing", $keyphrase);
print "
..finished keyword proximity analysis";


#-------------------Outbound Link Analysis--------------------------
my @googleOutboundLinkKeywords = ( );
my @yahooKOutboundLinkKeywords = ( );
my @bingOutboundLinkKeywords   = ( );
outboundLinkKeywordAnalysis($#googleLinks, @googleLinks, 
@googleOutboundLinkKeywords, "google", $keyphrase);
outboundLinkKeywordAnalysis($#yahooLinks, @yahooLinks, 
@yahooKOutboundLinkKeywords, "yahoo", $keyphrase);
outboundLinkKeywordAnalysis($#bingLinks, @bingLinks, 
@bingOutboundLinkKeywords, "bing", $keyphrase);
print "
..finished outbound links analysis";


#-------------------Outbound Link PR Analysis--------------------------
my @googleOutboundLinksPR = ( );
my @yahooKOutboundLinksPR = ( );
my @bingOutboundLinksPR   = ( );
outboundLinkPRAnalysis($#googleLinks, @googleLinks, 
@googleOutboundLinksPR, "google", $keyphrase);
outboundLinkPRAnalysis($#yahooLinks,  @yahooLinks, 
@yahooKOutboundLinksPR, "yahoo", $keyphrase);
outboundLinkPRAnalysis($#bingLinks,  @bingLinks,
@bingOutboundLinksPR, "bing", $keyphrase);
print "
..finished outbound link PR analysis";

#-------------------Average Page Size Analysis--------------------------
my @googlePageSize = ( );
my @yahooPageSize = ( );
my @bingPageSize   = ( );
my $googleAvgPageSize = averagePageSize($#googleLinks, @googlePageSize, "google");
my $yahooAvgPageSize = averagePageSize($#yahooLinks,  @yahooPageSize, "yahoo");
my $bingAvgPageSize = averagePageSize($#bingLinks,  @bingPageSize, "bing");
print "
..finished average page size analysis";


#-------------------Optimum Number of Words Analysis--------------------
my @googleWords = ( );
my @yahooWords = ( );
my @bingWords = ( );
my $googleWordsPerPage = optimumWordsPerPage($#googleLinks, @googleWords, "google");
my $yahooWordsPerPage = optimumWordsPerPage($#yahooLinks,  @yahooWords, "yahoo");
my $bingWordsPerPage = optimumWordsPerPage($#bingLinks,  @bingWords, "bing");
print "
..finished optimum number of words analysis";


#-------------------Website Size Analysis-------------------------------
my @googleResultsWebsiteSizes = ( );
my @yahooResultsWebsiteSizes = ( );
my @bingResultsWebsiteSizes = ( );
my $googleAverageWebSize = analyzeWebsiteSize($#googleLinks, @googleLinks, 
@googleResultsWebsiteSizes);
my $yahooAverageWebSize  = analyzeWebsiteSize($#yahooLinks, @yahooLinks, 
@yahooResultsWebsiteSizes);
my $bingAverageWebSize   = analyzeWebsiteSize($#bingLinks, @bingLinks, 
@bingResultsWebsiteSizes);
print "
..finished website size analysis";

#-------------------Page Age Analysis-----------------------------------
my @googlePageAge = ( );
my @yahooPageAge = ( );
my @bingPageAge = ( );
pageAgeAnalysis($#googleLinks, @googleLinks, @googlePageAge);
pageAgeAnalysis($#yahooLinks, @yahooLinks, @yahooPageAge);
pageAgeAnalysis($#bingLinks, @bingLinks, @bingPageAge);



#-------------------Create HTML Report---------------------------------

#create index file
createIndexHTML($keyphrase);

my $numberOfLinesGoogle = $#googleLinks;
my $numberOfLinesYahoo = $#yahooLinks;
my $numberOfLinesBing = $#bingLinks;

createGoogleHTMLReport();
createYahooHTMLReport();
createBingHTMLReport();


#---------------------------SUBROUTINES---------------------------
# Subroutine:
#   createGoogleHTMLReport
# Description:
#   This subroutine creates google.html file
#   which summerizes Google SERP findings
# Inputs:
#   None
# Outputs:
#   Creates google.html
# Returns:
#   Returns nothing
sub createGoogleHTMLReport {
   #create summary table first
   my $googleFile = "<html><head><title>Detailed Summary for Google</title>";
   $googleFile   .= "<style>";
   $googleFile   .= 
"body, td, tr{font-family: "Trebuchet ms", verdana, sans-serif; font-size:9px;}";
   $googleFile   .= 
"b{font-family: "Trebuchet ms", verdana, sans-serif;font-size:10px;}";
   $googleFile   .= "</style>";
   $googleFile   .= "</head>";
   $googleFile   .= "<body><h1>Ranking Report Summary</h1>";
   $googleFile   .= "<br>";
   $googleFile   .= 
"<table border="1" width="500" cellspacing="2" cellpadding="2">";
   $googleFile   .= "<tr><td colspan=2><b>Averages</b></td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Title Match</b></td>";
   my $tmp = sprintf "%.1f", $percentMatchTitlesGoogle;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Keyword Domain Exact Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordExactMatchGoogle;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Keyword Domain Partial Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordPartialMatchGoogle;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Keyword Density</b></td>";
   $tmp = sprintf "%.1f", $googleAvgDensity;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>Page Size [bytes]</b></td>";
   $tmp = sprintf "%.0f", $googleAvgPageSize;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>Words Per Page</b></td>";
   $tmp = sprintf "%.0f", $googleWordsPerPage;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>Website Size [of base url]</b></td>";
   $tmp = round($googleAverageWebSize);
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "</table><br><br>";
   $googleFile   .= "<b>Detail Table</b> <br>";
   $googleFile   .= "<table border=1 cellpadding=2 cellspacing=2>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td nowrap>#</td>";
   $googleFile   .= "<td width='100'><b>URL</b></td>";
   $googleFile   .= "<td nowrap width='150'><b>Google Title</b></td>";
   $googleFile   .= "<td nowrap width='150'><b>Page Title</b></td>";
   $googleFile   .= "<td nowrap><b>Keyword(s) found<br> in Title? [Y|N]</b></td>";
   $googleFile   .= "<td nowrap><b>Title Keywords <br>In Page Copy [%]</b></td>";
   $googleFile   .= "<td nowrap><b>Domain name <br>Exact Match</b></td>";
   $googleFile   .= "<td nowrap><b>Domain name <br>Partial Match</b></td>";
   $googleFile   .= "<td nowrap><b>Keyword Density</b></td>";
   $googleFile   .= "<td nowrap><b>META Description<br> Exact Match</b></td>";
   $googleFile   .= "<td nowrap><b>META Description<br> Partial Match</b></td>";
   $googleFile   .= "<td nowrap><b>Header Tags</b></td>";
   $googleFile   .= "<td nowrap><b>Header Tag <br>Keywords</b></td>";
   $googleFile   .= "<td nowrap width='350'><b>Keyword Positions in Page</b></td>";
   $googleFile   .= "<td nowrap><b>Keyword Prominence Map</b></td>";
   $googleFile   .= "<td nowrap><b>Outbound Links with Keywords</b></td>";
   $googleFile   .= "<td nowrap width='150'><b>Outbound Link<br> PRs</b></td>";
   $googleFile   .= "<td nowrap><b>Page Size <br>[bytes]</b></td>";
   $googleFile   .= "<td nowrap><b>Words in<br> Page</b></td>";
   $googleFile   .= "<td nowrap><b>Website Size</b></td>";
   $googleFile   .= "<td nowrap><b>Page Age</b></td>";
   $googleFile   .= "</tr>";

   for (my $i=0; $i < $numberOfLinesGoogle; $i++) {
      $googleFile   .= "<tr>";
      $googleFile   .= "<td align=left>$i&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleLinks[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleTitles[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleRealTitles[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleKeywordTitleMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.1f", $googleKeywordTitlePageCopy[$i];

      $googleFile   .= "<td align=left>$tmp&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleDomainKeywordExactMatch[$i]&nbsp;</td>";
      $googleFile   .= 
"<td align=left>$googleDomainKeywordPartialMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.3f", $googleKeywordDensity[$i];
      $googleFile   .= "<td align=left>$tmp&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleDescriptionMetaExact[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleDescriptionMetaPartial[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleNumberOfHeaderTags[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleHeaderTagsKeywords[$i]&nbsp;</td>";
      $tmp = $googleKeywordPositionsList[$i];
      $tmp =~ s/|/, /g;
      $googleFile   .= "<td align=left>$tmp&nbsp;</td>";
      $googleFile   .= 
"<td align=left><a href='./maps/google".$i.".html'>Map</a></td>";
      printIndividualKeywordProminenceMap($i, @googleKeywordPositions, "google");
      $googleFile   .= "<td align=left>$googleOutboundLinkKeywords[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleOutboundLinksPR[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googlePageSize[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleWords[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleResultsWebsiteSizes[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googlePageAge[$i]&nbsp;</td>";
      $googleFile   .= "</tr>";
   }
   my $filename = "./report/google.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $googleFile;
   close FILE;
}


# Subroutine:
#   createYahooHTMLReport
# Description:
#   This subroutine creates yahoo.html file
#   which summerizes Yahoo SERP findings
# Inputs:
#   None
# Outputs:
#   Creates yahoo.html
# Returns:
#   Returns nothing
sub createYahooHTMLReport {
   #create summary table first
   my $yahooFile = "<html><head><title>Detailed Summary for Yahoo</title>";
   $yahooFile   .= "<style>";
   $yahooFile   .= 
"body, td, tr{font-family: "Trebuchet ms", verdana, sans-serif; font-size:9px;}";
   $yahooFile   .= 
"b{font-family: "Trebuchet ms", verdana, sans-serif;font-size:10px;}";
   $yahooFile   .= "</style>";
   $yahooFile   .= "</head>";
   $yahooFile   .= "<body><h1>Ranking Report Summary</h1>";
   $yahooFile   .= "<br>";
   $yahooFile   .= 
"<table border="1" width="500" cellspacing="2" cellpadding="2">";
   $yahooFile   .= "<tr><td colspan=2><b>Averages</b></td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Title Match</b></td>";
   my $tmp = sprintf "%.1f", $percentMatchTitlesYahoo;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Keyword Domain Exact Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordExactMatchYahoo;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Keyword Domain Partial Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordPartialMatchYahoo;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Keyword Density</b></td>";
   $tmp = sprintf "%.1f", $yahooAvgDensity;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>Page Size [bytes]</b></td>";
   $tmp = sprintf "%.0f", $yahooAvgPageSize;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>Words Per Page</b></td>";
   $tmp = sprintf "%.0f", $yahooWordsPerPage;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>Website Size [of base url]</b></td>";
   $tmp = round($yahooAverageWebSize);
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "</table><br><br>";
   $yahooFile   .= "<b>Detail Table</b> <br>";
   $yahooFile   .= "<table border=1 cellpadding=2 cellspacing=2>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td nowrap>#</td>";
   $yahooFile   .= "<td width='100'><b>URL</b></td>";
   $yahooFile   .= "<td nowrap width='150'><b>Yahoo Title</b></td>";
   $yahooFile   .= "<td nowrap width='150'><b>Page Title</b></td>";
   $yahooFile   .= "<td nowrap><b>Keyword(s) found<br> in Title? [Y|N]</b></td>";
   $yahooFile   .= "<td nowrap><b>Title Keywords <br>In Page Copy [%]</b></td>";
   $yahooFile   .= "<td nowrap><b>Domain name <br>Exact Match</b></td>";
   $yahooFile   .= "<td nowrap><b>Domain name <br>Partial Match</b></td>";
   $yahooFile   .= "<td nowrap><b>Keyword Density</b></td>";
   $yahooFile   .= "<td nowrap><b>META Description<br> Exact Match</b></td>";
   $yahooFile   .= "<td nowrap><b>META Description<br> Partial Match</b></td>";
   $yahooFile   .= "<td nowrap><b>Header Tags</b></td>";
   $yahooFile   .= "<td nowrap><b>Header Tag <br>Keywords</b></td>";
   $yahooFile   .= "<td nowrap width='350'><b>Keyword Positions in Page</b></td>";
   $yahooFile   .= "<td nowrap><b>Keyword Prominence Map</b></td>";
   $yahooFile   .= "<td nowrap><b>Outbound Links with Keywords</b></td>";
   $yahooFile   .= "<td nowrap width='150'><b>Outbound Link<br> PRs</b></td>";
   $yahooFile   .= "<td nowrap><b>Page Size <br>[bytes]</b></td>";
   $yahooFile   .= "<td nowrap><b>Words in<br> Page</b></td>";
   $yahooFile   .= "<td nowrap><b>Website Size</b></td>";
   $yahooFile   .= "<td nowrap><b>Page Age</b></td>";
   $yahooFile   .= "</tr>";

   for (my $i=0; $i < $numberOfLinesYahoo; $i++) {
      $yahooFile   .= "<tr>";
      $yahooFile   .= "<td align=left>$i&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooLinks[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooTitles[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooRealTitles[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooKeywordTitleMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.1f", $yahooKeywordTitlePageCopy[$i];

      $yahooFile   .= "<td align=left>$tmp&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooDomainKeywordExactMatch[$i]&nbsp;</td>";
      $yahooFile   .= 
"<td align=left>$yahooDomainKeywordPartialMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.3f", $yahooKeywordDensity[$i];
      $yahooFile   .= "<td align=left>$tmp&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooDescriptionMetaExact[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooDescriptionMetaPartial[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooNumberOfHeaderTags[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooHeaderTagsKeywords[$i]&nbsp;</td>";
      $tmp = $yahooKeywordPositionsList[$i];
      $tmp =~ s/|/, /g;
      $yahooFile   .= "<td align=left>$tmp&nbsp;</td>";
      $yahooFile   .= 
"<td align=left><a href='./maps/yahoo".$i.".html'>Map</a></td>";
      printIndividualKeywordProminenceMap($i, @yahooKeywordPositions, "yahoo");
      $yahooFile   .= "<td align=left>$yahooOutboundLinkKeywords[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooOutboundLinksPR[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooPageSize[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooWords[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooResultsWebsiteSizes[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooPageAge[$i]&nbsp;</td>";
      $yahooFile   .= "</tr>";
   }
   my $filename = "./report/yahoo.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $yahooFile;
   close FILE;
}


# Subroutine:
#   createBingHTMLReport
# Description:
#   This subroutine creates bing.html file
#   which summerizes Bing SERP findings
# Inputs:
#   None
# Outputs:
#   Creates bing.html
# Returns:
#   Returns nothing
sub createBingHTMLReport {
   #create summary table first
   my $bingFile = "<html><head><title>Detailed Summary for Bing</title>";
   $bingFile   .= "<style>";
   $bingFile   .= 
"body, td, tr{font-family: "Trebuchet ms", verdana, sans-serif; font-size:9px;}";
   $bingFile   .= 
"b{font-family: "Trebuchet ms", verdana, sans-serif;font-size:10px;}";
   $bingFile   .= "</style>";
   $bingFile   .= "</head>";
   $bingFile   .= "<body><h1>Ranking Report Summary</h1>";
   $bingFile   .= "<br>";
   $bingFile   .= 
"<table border="1" width="500" cellspacing="2" cellpadding="2">";
   $bingFile   .= "<tr><td colspan=2><b>Averages</b></td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Title Match</b></td>";
   my $tmp = sprintf "%.1f", $percentMatchTitlesBing;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Keyword Domain Exact Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordExactMatchBing;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Keyword Domain Partial Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordPartialMatchBing;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Keyword Density</b></td>";
   $tmp = sprintf "%.1f", $bingAvgDensity;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>Page Size [bytes]</b></td>";
   $tmp = sprintf "%.0f", $bingAvgPageSize;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>Words Per Page</b></td>";
   $tmp = sprintf "%.0f", $bingWordsPerPage;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>Website Size [of base url]</b></td>";
   $tmp = round($bingAverageWebSize);
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "</table><br><br>";
   $bingFile   .= "<b>Detail Table</b> <br>";
   $bingFile   .= "<table border=1 cellpadding=2 cellspacing=2>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td nowrap>#</td>";
   $bingFile   .= "<td width='100'><b>URL</b></td>";
   $bingFile   .= "<td nowrap width='150'><b>Bing Title</b></td>";
   $bingFile   .= "<td nowrap width='150'><b>Page Title</b></td>";
   $bingFile   .= "<td nowrap><b>Keyword(s) found<br> in Title? [Y|N]</b></td>";
   $bingFile   .= "<td nowrap><b>Title Keywords <br>In Page Copy [%]</b></td>";
   $bingFile   .= "<td nowrap><b>Domain name <br>Exact Match</b></td>";
   $bingFile   .= "<td nowrap><b>Domain name <br>Partial Match</b></td>";
   $bingFile   .= "<td nowrap><b>Keyword Density</b></td>";
   $bingFile   .= "<td nowrap><b>META Description<br> Exact Match</b></td>";
   $bingFile   .= "<td nowrap><b>META Description<br> Partial Match</b></td>";
   $bingFile   .= "<td nowrap><b>Header Tags</b></td>";
   $bingFile   .= "<td nowrap><b>Header Tag <br>Keywords</b></td>";
   $bingFile   .= "<td nowrap width='350'><b>Keyword Positions in Page</b></td>";
   $bingFile   .= "<td nowrap><b>Keyword Prominence Map</b></td>";
   $bingFile   .= "<td nowrap><b>Outbound Links with Keywords</b></td>";
   $bingFile   .= "<td nowrap width='150'><b>Outbound Link<br> PRs</b></td>";
   $bingFile   .= "<td nowrap><b>Page Size <br>[bytes]</b></td>";
   $bingFile   .= "<td nowrap><b>Words in<br> Page</b></td>";
   $bingFile   .= "<td nowrap><b>Website Size</b></td>";
   $bingFile   .= "<td nowrap><b>Page Age</b></td>";
   $bingFile   .= "</tr>";

   for (my $i=0; $i < $numberOfLinesBing; $i++) {
      $bingFile   .= "<tr>";
      $bingFile   .= "<td align=left>$i&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingLinks[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingTitles[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingRealTitles[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingKeywordTitleMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.1f", $bingKeywordTitlePageCopy[$i];

      $bingFile   .= "<td align=left>$tmp&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDomainKeywordExactMatch[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDomainKeywordPartialMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.3f", $bingKeywordDensity[$i];
      $bingFile   .= "<td align=left>$tmp&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDescriptionMetaExact[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDescriptionMetaPartial[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingNumberOfHeaderTags[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingHeaderTagsKeywords[$i]&nbsp;</td>";
      $tmp = $bingKeywordPositionsList[$i];
      $tmp =~ s/|/, /g;
      $bingFile   .= "<td align=left>$tmp&nbsp;</td>";
      $bingFile   .= "<td align=left><a href='./maps/bing".$i.".html'>Map</a></td>";
      printIndividualKeywordProminenceMap($i, @bingKeywordPositions, "bing");
      $bingFile   .= "<td align=left>$bingOutboundLinkKeywords[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingOutboundLinksPR[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingPageSize[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingWords[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingResultsWebsiteSizes[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingPageAge[$i]&nbsp;</td>";
      $bingFile   .= "</tr>";
   }
   my $filename = "./report/bing.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $bingFile;
   close FILE;
}


# Subroutine:
#   createIndexHTML
# Description:
#   This subroutine creates HTML fragment for the index file
#   looking for last modified string
# Inputs:
#   $keyword => keyword
# Outputs:
#   Creates index.html
# Returns:
#   Returns nothing
sub createIndexHTML {
   my $keyword = shift;

   my $indexFile = "<html><head><title>Ranking Report Summary</title></head>";
   $indexFile   .= "<body><center><strong>Ranking Report Summary";
   $indexFile   .= " (for "$keyword") <br><br>";
   $indexFile   .= 
"<a href="#" onclick="document.all.myiframe.src='google.html'">";
   $indexFile   .= "Google</a> |";
   $indexFile   .= 
"<a href="#" onclick="document.all.myiframe.src='yahoo.html'">";
   $indexFile   .= "Yahoo!</a> |";
   $indexFile   .= 
"<a href="#" onclick="document.all.myiframe.src='bing.html'">";
   $indexFile   .= "Bing Search</a><br><br>";
   $indexFile   .= "Click on Links to View Summary..<br><br>";
   $indexFile   .= 
"<iframe name="myiframe" width=5000 height=6000 border="0" frameborder="0">";
   $indexFile   .= "</iframe></center></body></html>";

   my $filename = "./report/index.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $indexFile;
   close FILE;
}


# Subroutine:
#   pageAgeAnalysis
# Description:
#   This subroutine scrapes all URLs found on SERPs
#   looking for last modified string
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $srcArr => array (reference) to links array
# Outputs:
#   none
# Returns:
#   Returns nothing
sub pageAgeAnalysis {
   my ($numberOfElements, $srcArr, $destArr) =  @_;

   for(my $i=0; $i<$numberOfElements; $i++) {
      #print "
processing: $srcArr->[$i]";
      my $ua = new LWP::UserAgent;
      $ua->agent("Mozilla/3.0 (compatible)");
      my $request = new HTTP::Request("GET", "$srcArr->[$i]");
      my $response = $ua->request($request);
      my $code=$response->code;
      $destArr->[$i]= scalar(localtime($response->last_modified)),
      #print "
$destArr->[$i]";
   }
}


# Subroutine:
#   analyzeWebsiteSize
# Description:
#   This subroutine scrapes Google SERPs to pick up size of
#   different websites
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $srcArr => array (reference) to links array
# Outputs:
#   none
# Returns:
#   Returns average site size
sub analyzeWebsiteSize {
   my ($numberOfElements, $srcArr, $destArr) =  @_;
   # compose "site:" links
   my $ua = new LWP::UserAgent;
   my $res;
   $ua->timeout(25);
   $ua->agent("Mozilla/3.0 (compatible)");
   my $total = 0;

   for($i=0; $i<$numberOfElements; $i++){

      my $filename = "./serptemp/temp.txt";
      my $url = $srcArr->[$i];
      #let's get the base URL first

      if($url =~ /^http/) {
         my @tmparr1 = split (////,$url);
         my @tmparr2 = split (///,$tmparr1[1]);
         my $baseurl = "";
         if($#tmparr2>0) {
            $baseurl = $tmparr2[0];
         }else {
            $baseurl = $tmparr1[1];
         }
         $baseurl =~ s//$//;
         $url = $baseurl;
      }

      my $tmpurl = 
'http://www.google.com/search?hl=en&q=site%3A' . $url . '&btnG=Search';
      my $randNum = int(rand(5));
      #print "
Sleeping for $randNum seconds.
";
      sleep($randNum);
      $res = $ua->get("$tmpurl",':content_file' => "$filename");
      #get the google SERP pagecopy variable
      my $pageCopy = "";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         #get pageCopy for this file
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_text("/body");
         }
      }else {
         print "
file does not exist";
      }
      #break it up with "of about <b>"
      my $separator1 = 'of about ';

      my @tempArr1 = split(/$separator1/, $pageCopy);
      my $separator2 = 'b';
      my @tempArr2 = split(/$separator2/, $tempArr1[1]);
      my $separator3 = ' for';
      my @tempArr3 = split(/$separator3/, $tempArr2[0]);

      my $size = $tempArr3[0];

      #remove comma in the number
      $size =~ s/,//g;

      # store it for that URL
      $destArr->[$i] = $size;
      $total = $total + $size;
   }
   #calculate and return the average
   if ($total>0) {
      return ($total/$numberOfElements);

   } else {
      return 0;
   }
}


# Subroutine:
#   optimumWordsPerPage
# Description:
#   This subroutine loops through all files to record
#   page sizes in destination array.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $prefix => SE file prefix
# Outputs:
#   none
# Returns:
#   Returns average words per page size
sub optimumWordsPerPage {
   my ($numberOfElements, $destArr, $prefix) = @_;
   my $total = 0;
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my $tree = HTML::TreeBuilder->new;
      $tree->parse_file("$filename");
      my $non_html = $tree->as_text();
      $non_html =~ s/^s+/ /g;
      my @tempsizearr = split(/ /,$non_html);
      $destArr->[$i]= $#tempsizearr;
      $total = $total + $#tempsizearr;
   }
   return ($total/$numberOfElements);
}


# Subroutine:
#   averagePageSize
# Description:
#   This subroutine loops through all files to record
#   page sizes in destination array.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $prefix => SE file prefix
# Outputs:
#   none
# Returns:
#   Returns average page size
sub averagePageSize {
   my ($numberOfElements, $destArr, $prefix) = @_;
   my $total = 0;
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my $filesize = -s "$filename";
      $destArr->[$i] = $filesize;
      $total = $total + $destArr->[$i];
   }
   return ($total/$numberOfElements);
}


# Subroutine:
#   outboundLinkPRAnalysis
# Description:
#   This subroutine parses PR values from root domains
#   of all outbound links
# Inputs:
#   $numberOfElements => number of files to process
#   $srcLinksArr => array (reference) to links array
#   $prefix => SE file prefix
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub outboundLinkPRAnalysis {
   my ($numberOfElements, $srcLinksArr, $destArr, $prefix) = @_;
   my $PRURL = 'http://www.seowarrior.net/scripts/pr.php?pr=';
   my $range = 2;
   #loop through each file
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my %linkHash = ();
      my $PRs = "";
      #check for file existence
      if (-e "$filename") {
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("a")) {
            #get link and anchor text
            my $url = $token->[1]{href} || "-";
            my $text = $p->get_trimmed_text("/a");
            #check if link internal or external
            if($url =~ /^http/) {
               my @tmparr1 = split (////,$url);
               my @tmparr2 = split (/./,$tmparr1[1]);
               my $tmpbaseURLChild = $tmparr2[0] . $tmparr2[1];

               my @tmparr3 = split (////,$srcLinksArr->[$i]);
               my @tmparr4 = split (/./,$tmparr3[1]);
               my $tmpbaseURLParent = $tmparr4[0] . $tmparr4[1];

               my @tmparr5 = split (///,$tmparr1[1]);
               my $baseurl = "";
               if($#tmparr5>0) {
                  $baseurl = $tmparr5[0];
               }else {
                  $baseurl = $tmparr1[1];
               }
               $baseurl =~ s//$//;

               if($tmpbaseURLChild ne $tmpbaseURLParent) {
                  #working with external link
                  if( !(exists $linkHash{$baseurl}) ){
                     #obtain PR value / use random sleep
                     my $randNum = int(rand($range));
                     #print "
Sleeping for $randNum seconds.
";
                     sleep($randNum);

                     my $tmpurl = $PRURL . $baseurl;
                     my $PR = get $tmpurl;
                     #print "$PR:";
                     $PR =~ s/
//g;
                     $PRs = $PRs . $PR . "|";
                     $linkHash{$baseurl} = 1;
                  }
               }
            }
         }
      }
      else {
            #print "
Filename: $filename not found!";
      }
      $destArr->[$i] =  $PRs;
      #print "
$PRs";
   }
}


# Subroutine:
#   outboundLinkKeywordAnalysis
# Description:
#   This subroutine analyzes keywords in outbound links
# Inputs:
#   $numberOfElements => number of files to process
#   $srcLinksArr => array (reference) to links array
#   $prefix => SE file prefix
#   $keyword => keyword
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub outboundLinkKeywordAnalysis {
   my ($numberOfElements, $srcLinksArr, $destArr, $prefix, $keyword) = @_;
   my @keywordFragments = split(/ /,$keyword);
   #loop through each file
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my $keywordMatchPercent = "";
      my $foundCount = 0;
      my $total = 0;
      #check for file existence
      if (-e "$filename") {
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("a")) {
            #get link and anchor text
            my $url = $token->[1]{href} || "-";
            my $text = $p->get_trimmed_text("/a");
            $text =~ s/"//;
            $text =~ s/'//;

            #check if link internal or external
            if($url =~ /^http/) {
               @tmparr1 = split (////,$url);
               @tmparr2 = split (/./,$tmparr1[1]);
               $tmpbaseURLChild = $tmparr2[0] . $tmparr2[1];

               @tmparr3 = split (////,$srcLinksArr->[$i]);
               @tmparr4 = split (/./,$tmparr3[1]);
               $tmpbaseURLParent = $tmparr4[0] . $tmparr4[1];
               if($tmpbaseURLChild ne $tmpbaseURLParent) {
                  #external link..process it
                  if($#keywordFragments >0){
                     #handle multi keywords
                     for(my $j=0; $j <= $#keywordFragments; $j++){
                        #check for a match
                        if($text =~ /$keywordFragments[$j]/i) {
                           #match found
                           $foundCount++;
                           last;
                        }
                     }
                  } else {

                     if($text =~ /$keyword/i) {
                        #match found
                        $foundCount++;
                     }
                  }
               }
            }
            $total++;
         }
      }
      else {
            #print "
Filename: $filename not found!";

      }
      if($total>0) {
         $destArr->[$i] = ( $foundCount);
      } else {
         $destArr->[$i] = 0;
      }
      #print "
$destArr->[$i]";
   }

}


# Subroutine:
#   printKeywordProminenceMap
# Description:
#   This subroutine prints each URL map
# Inputs:
#   $numberOfElements => number of files to process
#   $srcArr => array (reference) to result array
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub printKeywordProminenceMap {
   my ($srcArr, $numberOfElements) = @_;
   for(my $i; $i<$numberOfElements; $i++){
      print "$srcArr->[$index]
";
   }
}


# Subroutine:
#   printIndividualKeywordProminenceMap
# Description:
#   This subroutine prints each URL map
# Inputs:
#   $numberOfElements => number of files to process
#   $srcArr => array (reference) to result array
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub printIndividualKeywordProminenceMap {
   my ($index, $srcArr, $prefix) = @_;
   my $filename = "./report/maps/$prefix".$index.".html";
   open FILE, ">", "$filename" or die $!;
   print FILE "<html><head><title>
";
   print FILE "Keyword Prominence Map
";
   print FILE "</title></head>
";
   print FILE "<body><table width=400 cellpading=2 cellspacing=0><tr><td width=400>";
   print FILE $srcArr->[$index];
   print FILE "</td></tr></table></body></html>";
   close FILE;
}


# Subroutine:
#   analyzeKeywordPositions
# Description:
#   This subroutine analyzes relative positions of keywords within a page copy
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on arrays


sub analyzeKeywordPositions {
   my ($numberOfElements, $destArr, $destArr2, $prefix, $keyword) = @_;
   my @keywordFragments = split(/ /,$keyword);
   #loop through each file to get
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $pageCopy = "";
      my $tmpMap = ":";
      my $filename = './serptemp/' . $prefix . "$i.txt";
      #check for file existence
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         #get pageCopy for this file
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_trimmed_text("/body");
            $pageCopy = cleanText($pageCopy);
         }
         $pageCopy =~ s/s+/ /g;
         my @tempArr = split(/ /, $pageCopy);
         $totalWords = $#tempArr;
         #print "
total words for this page: $totalWords";
         #loop through all words
         for(my $j=0; $j < $totalWords; $j++){
            my $flag = "N";
            if($#keywordFragments >0){
               #handle multi keywords
               for(my $k=0; $k <= $#keywordFragments; $k++){
                  #check for a match
                  if($tempArr[$j] =~ /$keywordFragments[$k]/i) {
                     #update destination variable with index of keyword array
                     $destArr->[$i] .= "$k ";
                     #update destination variable with relative positionposition
                     $destArr2->[$i] = $destArr2->[$i] ."$j" . "|";
                     $flag = "Y";
                     last;
                  } else {
                     if( ($k == $#keywordFragments) && ($flag ne "Y") ) {
                        $destArr->[$i] .= "* ";
                     }
                  }
               }
            } else {
               #handle single keyword
               $tempArr[$j] =~ s/"//;
               $tempArr[$j] =~ s/'//;

               if($tempArr[$j] =~ /$keyword/i){
                  $destArr->[$i] .= "0 ";
                  $destArr2->[$i] = $destArr2->[$i] . "$j" . "|";
                  $flag = "Y";
               } else {
                  $destArr->[$i] .= "* ";
               }
            }
            if($flag ne "N") {
               $destArr->[$i] .= "* ";
            }
         }
         #print "

$destArr->[$i]";
      } else {
         print "
file does not exist";
      }
   }
}


# Subroutine:
#   checkHeaderTags
# Description:
#   This subroutine checks use of heading tags in addition to checking
#   for keyword use in the same tags.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr1 => array (reference) to result array
#   $destArr2 => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on arrays
sub checkHeaderTags {
   my ($numberOfElements, $destArr1, $destArr2, $prefix, $keyword) = @_;
   my @keywordFragments = split(/ /,$keyword);

   for(my $i=0; $i < $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         my $h1Text = "";
         my $h2Text = "";
         my $h3Text = "";
         my $h4Text = "";
         my $h5Text = "";
         my $h6Text = "";
         my $separator = '|s|e|p|a|r|a|t|o|r';
         while(my $token = $p->get_token) {
            if($token->[0] eq 'S' and $token->[1] eq 'h1') {
               $h1Text = $h1Text . $separator . $p->get_text("/h1");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h2') {
               $h2Text = $h2Text . $separatpr . $p->get_text("/h2");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h3') {
               $h3Text = $h3Text . $separator . $p->get_text("/h3");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h4') {
               $h4Text = $h4Text . $separator . $p->get_text("/h4");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h5') {
               $h5Text = $h5Text . $separator . $p->get_text("/h5");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h6') {
               $h6Text = $h6Text . $separator . $p->get_text("/h6");
            }
         }
         $h1Text = cleanText($h1Text);
         $h2Text = cleanText($h2Text);
         $h3Text = cleanText($h3Text);
         $h4Text = cleanText($h4Text);
         $h5Text = cleanText($h5Text);
         $h6Text = cleanText($h6Text);

         my @h1Arr = split($separator, $h1Text);
         my @h2Arr = split($separator, $h2Text);
         my @h3Arr = split($separator, $h3Text);
         my @h4Arr = split($separator, $h4Text);
         my @h5Arr = split($separator, $h5Text);
         my @h6Arr = split($separator, $h6Text);

         my $h1Cnt = ($#h1Arr == −1) ? 0 : $#h1Arr;
         my $h2Cnt = ($#h2Arr == −1) ? 0 : $#h2Arr;
         my $h3Cnt = ($#h3Arr == −1) ? 0 : $#h3Arr;
         my $h4Cnt = ($#h4Arr == −1) ? 0 : $#h4Arr;
         my $h5Cnt = ($#h5Arr == −1) ? 0 : $#h5Arr;
         my $h6Cnt = ($#h6Arr == −1) ? 0 : $#h6Arr;

         my $h1Flag = "N";
         my $h2Flag = "N";
         my $h3Flag = "N";
         my $h4Flag = "N";
         my $h5Flag = "N";
         my $h6Flag = "N";
         $destArr1->[$i] = 
"".$h1Cnt."|".$h2Cnt."|".$h3Cnt."|".$h4Cnt."|".$h5Cnt."|".$h6Cnt;
         if($#keywordFragments > 0) {
            #handle multi keywords
            for(my $j=0; $j<=$#keywordFragments; $j++) {

               if( $keywordFragments[$j] =~ /$h1Text/i ) {
                  $h1Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h2Text/i ) {
                  $h2Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h3Text/i ) {
                  $h3Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h4Text/i ) {
                  $h4Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h5Text/i ) {
                  $h5Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h6Text/i ) {
                  $h6Flag = "Y";
               }
            }
         } else {
            #handle keyword
            if($keyword =~ /$h1Text/i) {
               $h1Flag = "Y";
            }
            if($keyword =~ /$h2Text/i) {
               $h2Flag = "Y";
            }
            if($keyword =~ /$h3Text/i) {
               $h3Flag = "Y";
            }
            if($keyword =~ /$h4Text/i) {
               $h4Flag = "Y";
            }
            if($keyword =~ /$h5Text/i) {
               $h5Flag = "Y";
            }
            if($keyword =~ /$h6Text/i) {
               $h6Flag = "Y";
            }

         }
         $destArr2->[$i] = 
"".$h1Flag."|".$h2Flag."|".$h3Flag."|".$h4Flag."|".$h5Flag."|".$h6Flag;

      } else {
          # no file =>insert defaults;
          $destArr1->[$i] = "0|0|0|0|0|0|";
          $destArr2->[$i] = "N|N|N|N|N|N|";
      }
      #print "
".$destArr1->[$i]."
".$destArr2->[$i];

   }
}


# Subroutine:
#   checkExactDescriptionMeta
# Description:
#   This subroutine checks for exact keyword match in keyword description.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array
sub checkExactDescriptionMeta {
   my ($numberOfElements, $destArr, $keyword, $prefix) = @_;
   for(my $i=0; $i<$numberOfElements; $i++){
      $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token=$p->get_tag("meta")) {
            if ($token->[1]{name}=~/description/i) {
               my $metaDescription = $token->[1]{content};
               $metaDescription =~ s/"//;
               $metaDescription =~ s/'//;

               if($metaDescription =~ /$keyword/i) {
                  $destArr->[$i] = "Y";
               } else {
                  $destArr->[$i] = "N";
               }
            }
         }
      }
      if ( !(exists $destArr->[$i])) {
         $destArr->[$i] = "N";
      }
    }
}


# Subroutine:
#   checkExactDescriptionMeta
# Description:
#   This subroutine checks for exact keyword match in keyword description.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array
sub checkPartialDescriptionMeta {
   my ($numberOfElements, $destArr, $keyword, $prefix) = @_;
   my @keywordFragments = split(/ /, $keyword);

   for(my $i=0; $i<$numberOfElements; $i++){
      $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token=$p->get_tag("meta")) {
            if ($token->[1]{name}=~/description/i) {
               my $metaDescription = $token->[1]{content};

               if($#keywordFragments >0) {
                  for (my $j=0; $j<=$#keywordFragments; $j++){
                     if($metaDescription =~ /$keywordFragments[$j]/i) {
                        $destArr->[$i] = "Y";
                        last;
                     } else {
                        $destArr->[$i] = "N";
                     }
                  }
               } else {
                  if($metaDescription =~ /$keyword/i) {
                     $destArr->[$i] = "Y";
                     last;
                  } else {
                     $destArr->[$i] = "N";
                  }
               }
            }
         }
      }
      if ( !(exists $destArr->[$i])) {
         $destArr->[$i] = "N";
      }
   }
}


# Subroutine:
#   keywordDensity
# Description:
#   This subroutine calculates keyword density for given keyword.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array

sub keywordDensity {
   my ($numberOfElements, $keyword, $destArr, $prefix) = @_;
   my $total = 0;
   #loop through all files

   for(my $i=0; $i<$numberOfElements; $i++) {
      my $pageCopy = "";
      my $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_trimmed_text("/body");
         }
      } else {
         print "
File not found when calculating keyword density.";

      }
      #compare copy and array (sep function)
      $pageCopy =~ s/"//g;
      $pageCopy =~ s/'//g;

      $total = $total + calculateKD($i, $pageCopy, $destArr, $keyword);
   }
   return ($total/$numberOfElements);
}


# Subroutine:
#   calcualteKD
# Description:
#   Helper subroutine to calculate keyword density
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array
sub calculateKD {
   my ($index, $pageCopy, $destArr, $keyword) = @_;

   my @keywordFragments = split (/ /,$keyword);
   if ($#keywordFragments>0) {
      for (my $i=0; $i<= $#keywordFragments; $i++){
         my @tempArr  = split(/$keywordFragments[$i]/,$pageCopy);
         my @tempArr2 = split(/ /, $pageCopy);
         if( ($#tempArr == −1) || ($#tempArr2 == −1)) {
            $destArr->[$index] = 0;
         }else {
            $destArr->[$index] = $destArr->[$index] + ($#tempArr/$#tempArr2)*100;
         }
      }
      return $destArr->[$index];

   } else {
      my @tempArr  = split(/$keyword/,$pageCopy);
      my @tempArr2 = split(/ /, $pageCopy);
      $destArr->[$index] = ($#tempArr/$#tempArr2)*100;
      return $destArr->[$index];
   }

}


# Subroutine:
#   keywordDomainExactMatch
# Description:
#   This subroutine analyzes keywords in domain names. It looks
#   to see if keyword is part of the domain name.
#   Possible improvement could also consider keyword stemming.
# Inputs:
#   $numberOfElements => number of files to process
#   $linksArr => array (reference) to links array
#   $destArr => array (reference) to result array
#   $keyword => file prefix for the three SEs
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed array
sub keywordDomainExactMatch {
   my ($keyword, $linksArr, $numberOfElements, $destArr) = @_;
   my $matchCnt=0;
   my @keywordFragments = split(/ /, $keyword);
   my $numberOfKeywordFragments = $#keywordFragments;
   my $total = 0;
   for (my $i=0; $i<=$numberOfElements; $i++) {
      $matchCnt=0;
      my $tmp = $linksArr->[$i];
      $tmp =~ s/^http:////g;
      $tmp =~ s/^https:////g;
      my @linkFragments = split(///,$tmp);
      my $link = $linkFragments[0];

      if($numberOfKeywordFragments>0) {
         for(my $j=0; $j<=$numberOfKeywordFragments; $j++) {
            if ($link =~ /$keywordFragments[$j]/i) {
               $matchCnt++;
            }
         }
      } else {
         if($link =~ /$keyword/i) {
              $matchCnt++;
         }
      }
      if($matchCnt>0) {
         if($numberOfKeywordFragments>0) {
            if($matchCnt == ($numberOfKeywordFragments+1)) {
               $destArr->[$i] = "Y";
            } else {
               $destArr->[$i] = "N";
            }
          } else {
             # single keyword
             $destArr->[$i] = "Y";
          }

       } else {
          $destArr->[$i] = "N";
       }
       if($destArr->[$i] eq "Y") {
         $total++;
       }
   }
   return ( ($total/$numberOfElements)* 100);
}


# Subroutine:
#   keywordDomainPartialMatch
# Description:
#   This subroutine analyzes keywords in domain names. It looks
#   for partial matche between the keyword and the domain name.
# Inputs:
#   $numberOfElements => number of files to process
#   $linksArr => array (reference) to links array
#   $destArr => array (reference) to result array
#   $keyword => file prefix for the three SEs
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed array
sub keywordDomainPartialMatch {
   my ($keyword, $linksArr, $numberOfElements, $destArr) = @_;
   my $totalNumber = $numberOfElements;
   my $matchCnt=0;
   my @keywordFragments = split (/ /, $keyword);
   my $numOfKeywordFragments = $#keywordFragments;

   my $keywordHyphen = $keyword;
   my $keywordUnderscore = $keyword;
   my $keywordNoSpace = $keyword;
   $keywordHyphen =~ s/ /-/g;
   $keywordNoSpace =~ s/ //g;


   #loop through all links
   if($numOfKeywordFragments >0) {
      for(my $i=0; $i<$numberOfElements; $i++) {
         my $tmp = $linksArr->[$i];
         $tmp =~ s/^http:////gi;
         $tmp =~ s/^https:////gi;
         my @linkFragments = split(///,$tmp);
         my $link = $linkFragments[0];
         for(my $j=0; $j<=$numOfKeywordFragments; $j++) {
            if($link =~ /$keywordFragments[$j]/i) {
               $destArr->[$i] = "Y";
               $j = $numOfKeywordFragments;
               $matchCnt++;
            } else {
               $destArr->[$i] = "N";
            }
         }
      }
   } else {

      for(my $i=0; $i<$numberOfElements; $i++) {
         my $tmp = $linksArr->[$i];
         $tmp =~ s/^http:////g;
         $tmp =~ s/^https:////g;
         my @linkFragments = split(///,$tmp);
         my $link = $linkFragments[0];

         if( ($link =~ /$keyword/) ||
            ($link =~ /$keywordHyphen/) ||
            ($link =~ /$keywordNoSpace/) ) {
            $destArr->[$i] = "Y";
            $matchCnt++;
         } else {
            $destArr->[$i] = "N";
         }
      }



   }
   return ( ($matchCnt/$totalNumber)* 100);
}


# Subroutine:
#   compareTitlePageCopy
# Description:
#   This subroutine compares page title to page copy
# Inputs:
#   $numberOfElements => number of files to process
#   $titlesArr => array (reference) to titles array
#   $destArr => array (reference) to result array
#   $prefix => file prefix for the three SEs
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed arrays
sub compareTitlePageCopy {
   my ($numberOfElements, $titlesArr, $destArr, $prefix) = @_;
   #loop through all files
   for(my $i=0; $i<=$numberOfElements; $i++) {
      #split up current title into token words
      my $title = $titlesArr->[$i];

      $title = cleanText($title);
      $title =~ s/'//g;
      $title =~ s/"//g;


      my @titleFragments = split(/ /,$title);
      #get copy of each file
      my $pageCopy = "";
      my $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_trimmed_text("/body");
            $pageCopy =~ s/'//g;
            $pageCopy =~ s/"//g;

           last;
         }
      }
      #compare copy and array (sep function)
      compareTitlePageCopyHelper($i, $#titleFragments, 
@titleFragments, $pageCopy, $destArr);
   }

}


# Subroutine:
#   compareTitlePageCopyHelper
# Description:
#   This subroutine is used by compareTitlePageCopy subroutine
#   to compare page title to page copy
# Inputs:
#   $index => represents numerical index of the array
#   $numberOfElements => number of files to process
#   $titleFragments => array (reference) to title fragments array
#   $pageCopy => page copy text
#   $pageCopyTitleArr => array (reference) to resulting array
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed arrays

sub compareTitlePageCopyHelper {
   my ($index, $numberOfElements, $titleFragments, $pageCopy, $pageCopyTitleArr) = @_;
   my $foundCnt = 0;
   my $totalTitleFragments = $numberOfElements;

   for(my $j=0; $j<=$numberOfElements; $j++) {
      my $tmpfragment = $titleFragments->[$j];

      if( $pageCopy =~ /$tmpfragment/i ){
         $foundCnt++;
      }
   }
   if($foundCnt == 0){
      $pageCopyTitleArr->[$index] = 0;
   } else {
         $pageCopyTitleArr->[$index] = ( ($foundCnt/($totalTitleFragments+1)) * 100);

   }
}


# Subroutine:
#   compareArrays
# Description:
#   This subroutine compares elements of two arrays to see if they
#   are found in each other.
# Inputs:
#   $numberOfElements => number of files to process
#   $realArr => array (reference) to first source array
#   $foundArr => array (reference) to second source array
#   $destArr => array (reference) to result array
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine returns percentage of found matches
sub compareArrays {
   my ($numOfElements, $realArr, $foundArr, $destArr) = @_;
   my $found = 0;
   my $percentMatch = 0;

   for(my $i=0; $i<$numOfElements; $i++){
      $tmpVar = $foundArr->[$i];
      $tmpVar =~ s/(/\(/g;
      $tmpVar =~ s/)/\)/g;
      $tmpVar =~ s/-/\-/g;
      $tmpVar =~ s/+/\+/g;
      $tmpVar =~ s/$/\$/g;
      $tmpVar =~ s/^/\^/g;
      $tmpVar =~ s/[/\[/g;
      $tmpVar =~ s/]/\]/g;
      $tmpVar =~ s/}/\}/g;
      $tmpVar =~ s/{/\{/g;


      if ($realArr->[$i] =~ /$tmpVar/i) {
         $destArr[$i] = "Y";
         $found++;
      }else {
         $destArr[$i] = "N";
      }

   }
   return ( ($found/$numOfElements)*100);
}


# Subroutine:
#   getRealTitles
# Description:
#   This subroutine retrieves actual titles
# Inputs:
#   $numberOfElements => number of files to process
#   $titlesArr => array (reference) to array that will contain real titles
#   $prefix => prefix of file name to be used
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine operates on array already defined outside the routine.
#   Subroutine returns nothing.
sub getRealTitles {
   my ($numberOfElements, $titlesArr, $prefix) = @_;
   for(my $i=0; $i<$numberOfElements; $i++){
      $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_token) {
            if ($token->[0] eq "S" and lc $token->[1] eq 'title') {
               my $title =  $p->get_text() || "not found";
               $title =~ s/^s+//;
               $title =~ s/s+$//;
               $titlesArr->[$i]=$title;
               last;
            }
         }
      }else {
         $titlesArr->[$i]="not found";
      }

   }
}


# Subroutine:
#   getKeywordsTitleMatch
# Description:
#   This subroutine compares given keyword with entires of array
#   while setting third array with results of this comparison
# Inputs:
#   $keyword => keyword or keyphrase to do analysis on
#   $sourceArr => array (reference) to be used for comparisons
#   $numOfElements => size of referred array
#   $destArr => array (reference) that will contain compariosn results
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine operates on array already defined outside the routine.
#   Subroutine returns nothing.
sub getKeywordsTitleMatch {
   my ($keyword, $sourceArr, $numOfElements, $destArr) = @_;

   $keyword = cleanText($keyword);
   $keyword =~ s/'//g;
   $keyword =~ s/"//g;
   @keywordFragments = split(/ /, $keyword);
   my $numberOfKeywordTokens = $#keywordFragments;

   for(my $i=0; $i<= $numOfElements; $i++) {
      my $tmp = $sourceArr->[$i];
      $tmp = cleanText($tmp);
      $tmp =~ s/'//;
      $tmp =~ s/"//;
      my $foundCnt = 0;
      if ($numberOfKeywordTokens >0) {
         for(my $j=0; $j<=$#keywordFragments; $j++){
            if ($tmp =~ /$keywordFragments[$j]/i) {
               $foundCnt++;

               last;
            }
         }
      } else {
         if ($tmp =~ /$keyword/i) {
            $foundCnt++;

         }
      }
      if($foundCnt > 0) {
         $destArr->[$i] = "Y";
      } else {
         $destArr->[$i] = "N";
      }
   }
}


# Subroutine:
#   initializeKeyVariables
# Description:
#   Main purpose is to setup link and title arrays that are
#   to be used throughout the script.
# Inputs:
#   $keyword => keyword or keyphrase to do analysis on
#   $googleLinksArr => array (reference) containing Google links
#   $googleTitlesArr => array (reference) containing Google titles
#   $yahooLinksArr => array (reference) containing Yahoo! links
#   $yahooTitlesArr => array (reference) containing Yahoo! titles
#   $bingLinksArr => array (reference) containing Bing links
#   $bingTitlesArr => array (reference) containing Bing titles
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine operates on arrays already defined outside the routine.
#   Subroutine returns nothing.
sub initializeKeyVariables {
   my ($keyword,     $googleLinksArr,$googleTitlesArr,
       $yahooLinksArr, $yahooTitlesArr,   $bingLinksArr,
       $bingTitlesArr) = @_;
   #create user agents
   my $uaGoogle = new LWP::UserAgent;
   my $uaYahoo = new LWP::UserAgent;
   my $uaBing = new LWP::UserAgent;

   #setup time out to 25 seconds
   $uaGoogle->timeout(25);
   $uaYahoo->timeout(25);
   $uaBing->timeout(25);

   #setup user agent
   my $useragent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)";
   $uaGoogle->agent("$useragent");
   $uaYahoo->agent("$useragent");
   $uaBing->agent("$useragent");

   #setup & get one hundred results for each SE
   my $gurl= 
"http://www.google.com/search?num=$numres&hl=en&safe=off&q=$keyword&sa=N";
   my $yurl= 
"http://search.yahoo.com/search?p=$keyword&ei=UTF-8&fr=sfp&n=$numres&b=1";
   my $lurl= 
"http://search.bing.com/results.aspx?q=$keyword&first=1&count=$numres&";


   my $reqGoogle = new HTTP::Request GET => "$gurl";
   my $reqYahoo = new HTTP::Request GET => "$yurl";
   my $reqBing = new HTTP::Request GET => "$lurl";

   my $resGoogle = $uaGoogle->request($reqGoogle);
   my $resYahoo = $uaYahoo->request($reqYahoo);
   my $resBing = $uaBing->request($reqBing);

   #assign SERPs to special variables
   my $ghtml = $resGoogle->content;
   my $yhtml = $resYahoo->content;
   my $lhtml = $resBing->content;

   #get links for each serp
   my $streamGoogle = HTML::TokeParser->new($ghtml);
   my $streamYahoo  = HTML::TokeParser->new($yhtml);
   my $streamBing   = HTML::TokeParser->new($lhtml);

   # process google links
   my $cnt=0;
   my $threeDots = '...';
   while (my $token = $streamGoogle->get_token) {
      if ($token->[0] eq 'S' && $token->[1] eq 'a') {
         if( ($token->[2]{'href'} !~ /cache/i) &&
         !($token->[2]{'href'} !~ /^http/i) &&
          ($token->[2]{'href'} !~ /^https/i) &&
          ($token->[2]{'href'} !~ /google/i) &&
          ($token->[2]{'href'} !~ /aclk/i) &&
          ($token->[2]{'href'} !~ /youtube/i)&&
          ($token->[2]{'href'} !~ /wikipedia/i) ) {
          $googleLinksArr->[$cnt] = $token->[2]{'href'};
          $googleTitlesArr->[$cnt] = $streamGoogle->get_trimmed_text("/a");
          $googleTitlesArr->[$cnt] =~ s/$threeDots$//;
          $cnt++;
         }
      }
   }
   # process yahoo links
   my $cnt2=0;
   while (my $token = $streamYahoo->get_token) {
      if ($token->[0] eq 'S' && $token->[1] eq 'a') {
        @tmpurl= split (/**/, $token->[2]{'href'});
        $tmpurl[1] =~ s/%3f/?/g;
        $tmpurl[1] =~ s/%26/&/g;

        if( ($tmpurl[1] !~ /cache/i) &&
            ($tmpurl[1] !~ /^https/i) &&
            ($tmpurl[1] !~ /yahoo/i) &&
            ($tmpurl[1] !~ /wikipedia/i) &&
            ($tmpurl[1] !~ /overture/i) ){
           $tmpurl[1] =~ s/%3a/:/g;
           $tmpurl[1] =~ s/^s+//g;
           if( $tmpurl[1] ne "") {
              $yahooLinksArr->[$cnt2] = $tmpurl[1];
          $yahooTitlesArr->[$cnt2] = $streamYahoo->get_trimmed_text("/a");
              $yahooTitlesArr->[$cnt2] =~ s/$threeDots$//;
              $cnt2++;
           }
        }
      }
   }
   # process bing links
   my $cnt3=0;
   while (my $token = $streamBing->get_token) {
      if ($token->[0] eq 'S' && $token->[1] eq 'a') {
         if( !($token->[2]{'href'} !~ /^http/i) &&
          ($token->[2]{'href'} !~ /^https/i) &&
          ($token->[2]{'href'} !~ /cache/i) &&
          ($token->[2]{'href'} !~ /wikipedia/i) &&
          ($token->[2]{'href'} !~ /msn/i) &&
          ($token->[2]{'href'} !~ /hotmail/i) &&
          ($token->[2]{'href'} !~ /microsoft/i) &&
          ($token->[2]{'href'} !~ /bing.com/i) ) {
           $token->[2]{'href'} =~ s/^s+//g;
           if($token->[2]{'href'} ne "")  {
              $bingLinksArr->[$cnt3] = $token->[2]{'href'};
          $bingTitlesArr->[$cnt3] = $streamBing->get_trimmed_text("/a");
              $bingTitlesArr->[$cnt3] =~ s/$threeDots$//;
              $cnt3++;
           }
         }
      }
   }
}


# Subroutine:
#   getSERPResults
# Description:
#   This subroutine downloads htmls of all urls specified
#   in the array referenced by $urlArr
# Inputs:
#   $numberOfElements => size of referred array
#   $urlArr => array (reference) containing urls to process
#   $name => prefix of file name to be used
# Outputs:
#   text files contain html from downloaded links
# Returns:
#   Subroutine operates on array already defined outside the routine.
#   Subroutine returns nothing.
sub getSERPResults {
   my ($numberOfElements, $urlArr, $name) = @_;
   my $ua = new LWP::UserAgent;
   my $res;

   $ua->timeout(25);
   $ua->agent("My Crawler");

   for($i=0;$i<$numberOfElements;$i++){
      $filename = "./serptemp/". $name . $i . ".txt";
      $res = $ua->get("$urlArr->[$i]",':content_file' => "$filename");
   }
}


# Subroutine:
#   cleanText
# Description:
#   This is a utility subroutine to clean HTML fragments.
# Inputs:
#   $text => content of text to clean
# Outputs:
#   No outputs produced
# Returns:
#   No returns; all work done on passed array
sub cleanText {
   my $text = shift;
      $text =~ s/(/ /g;
      $text =~ s/)/ /g;
      $text =~ s/[/ /g;
      $text =~ s/]/ /g;
      $text =~ s/./ /g;
      $text =~ s/-/ /g;
      $text =~ s/=/ /g;
      $text =~ s/|/ /g;
      $text =~ s/!/ /g;
      $text =~ s/,/ /g;
      $text =~ s/?/ /g;
      $text =~ s/^/ /g;
      $text =~ s/:/ /g;
      $text =~ s/;/ /g;
      $text =~ s/&/ /g;
      $text =~ s/*/ /g;
      $text =~ s/$/ /g;
      $text =~ s/s+/ /g;
   return $text;
}

Chapter 5

linkchecker.pl

#!/usr/local/bin/perl
#####################################################################
# File: linkchecker.pl                                              #
# Description: Check Links Script                                   #
# Usage: perl linkchecker.pl http://somedomain.net > report.csv     #                                                               #####################################################################
use WWW::Mechanize;
use LWP::Simple;
my $baseurl = shift;
my @url=();
my @level=();
my @type=();
my @title=();
my @status=();
my @page=();
my %uniqueURL=();
my %checkedURL=();
my $masterCnt=0;
my $masterLevel=1;
$mech  = WWW::Mechanize->new();
#### Processing Level One
$mech->get( $baseurl );

@links = $mech->links();
foreach $link (@links) {
$tmpurl = $baseurl . '/' . $link->url();
  if ( ($link->url() !~ /mailto/i) &&
       ($link->url() !~ /javascript/i ) ) {
    if ($link->url() !~ /^http/) {
      #collect unique URL
      $uniqueURL{$tmpurl}=$link->text();
      $url[$masterCnt]=$tmpurl;
      $type[$masterCnt]= "relative";
    }else {
      $tmpurl = $link->url();
      $uniqueURL{$link->url()}=$link->text();
      $url[$masterCnt]=$link->url();
      if( $link->url() =~ /$baseurl/ ){
        $type[$masterCnt]= "absolute internal";
      }else {
        $type[$masterCnt]= "outbound";
      }
    }
    $level[$masterCnt]=$masterLevel;
    $title[$masterCnt]=$link->text();
    $page[$masterCnt]=$baseurl;
    $masterCnt++;
  }
}
$masterLevel++;
$linksOnFirstLevel=$masterCnt;

####Processing Level Two
%levTwoURLs = ();
$masterCnt = processSubLevel(2, $masterCnt, @url, @level, @type,
                       @title, @status, @page, \%uniqueURL,
                      $baseurl,  $masterLevel, \%levTwoURLs);
$masterLevel++;
$linksOnSecondLevel = keys(%levTwoURLs);
####Processing Level Three
%levThreeURLs = ();
$masterCnt = processSubLevel(3, $masterCnt, @url, @level,
                           @type, @title, @status, @page,
                    \%levTwoURLs, $baseurl, $masterLevel,
                    \%levThreeURLs);
$masterLevel++;
$linksOnThirdLevel = keys(%levThreeURLs);
####Processing Level Four
%levFourURLs = ();
$masterCnt = processSubLevel(4, $masterCnt, @url, @level, @type,
                       @title, @status,@page, \%levThreeURLs,
                      $baseurl, $masterLevel, \%levFourURLs);
$linksOnFourthLevel = keys(%levFourURLs);
printReport(@level,@page,@url,@type,@title,@status, $masterCnt);
#### subroutines
sub processSubLevel {
  my ($currentLevel, $mstCnt, $urlArr, $leArr, $tyArr, $tiArr,
             $stArr,  $paArr, $urls, $burl, $mlevel,
             $uniqueHashRef) = @_;

  my %urlHash = ();
  foreach $item (@$urlArr){
    $urlHash{$item} = 1;
  }
  foreach $lURL (keys %$urls) {
    if( ($lURL !~ /.gif$/) && ($lURL !~ /.jpg$/) &&
        ($lURL !~ /.png$/) && ($lURL !~ /.pdf$/) &&
        ($lURL !~ /.doc$/) && ($lURL !~ /.xls$/) &&
        ($lURL !~ /.asf$/) && ($lURL !~ /.mov$/) &&
        ($lURL !~ /.avi$/) && ($lURL !~ /.xvid$/) &&
        ($lURL !~ /.flv$/) && ($lURL !~ /.mpg$/) &&
        ($lURL !~ /.3gp$/) && ($lURL !~ /.mp4$/) &&
        ($lURL !~ /.qt$/) && ($lURL !~ /.rm$/) &&
        ($lURL !~ /.swf$/) && ($lURL !~ /.wmv$/) &&
        ($lURL !~ /.txt$/) && ($lURL !~ /.js$/) &&
        ($lURL !~ /.css$/) && ($lURL =~ /$burl/) &&
        ($lURL !~ /mailto/i)&&($lURL !~ /javascript/i)  ) {
      $mech->get( $lURL );
      @sublinks = $mech->links();
      $cnt2=0;
      foreach $link (@sublinks) {
        my $tmpurl ="";
        #assuming relative link creating temp variable
        if ( $link->url() !~ /^http/i ) {
          $tmpurl = $burl . '/' . $link->url();
        }else {
          $tmpurl = $link->url();
        }
        if(!(exists $urlHash{$tmpurl}) ){
          if ( ($link->url() !~ /mailto/i) &&
              ($link->url() !~ /javascript/i ) ) {
             #check UNIQUENESS
             if( !(exists $urls->{$tmpurl}) ) {
                $urls->{$tmpurl}=$link->text();
                $uniqueHashRef->{ $tmpurl } = $link->text();
             }
             # check if link relative or absolute
             if ( $link->url() !~ /^http/ ) {
               ## RELATIVE
               $urlArr->[$mstCnt]= $tmpurl;
               $tyArr->[$mstCnt]= "relative internal";
             }else {
               ## ABSOLUTE
               #adjusting temp variable
               $urlArr->[$mstCnt]=$link->url();
               if( $link->url() =~ /$baseurl/ ){
                 $tyArr->[$mstCnt]= "absolute internal";
               }else {
                 $tyArr->[$mstCnt]= "outbound";
               }
             }
             $leArr->[$mstCnt]=$mlevel;
             $tiArr->[$mstCnt]=$link->text();
             $paArr->[$mstCnt]=$tmpurl;
             $mstCnt++;
          }
        }
      }
    }
  }
  return ($mstCnt);
}
sub printReport {
  my ($levelArr, $pageArr, $urlArr, $typeArr, $titleArr,
     $statusArr, $mCnt) = @_;
  %tmpCleanupHash=();
  print "Level	Parent Page or Location	
Unique URL	Link Type	Title	Status Codes";
  for($i=0;$i<$mCnt;$i++) {
    if ( !(exists $tmpCleanupHash{$url[$i]}) ){
      $tmpCleanupHash{$url[$i]} = 1;
      if ($levelArr->[$i] ne "") {
        print 
"
$levelArr->[$i]	$pageArr->[$i]	$urlArr->[$i]	$typeArr->[$i]	$titleArr->[$i]
t".getstore($urlArr->[$i], "temp");
      }
    }
  }
}

mymonitor.pl

################################################
# File: mymonitor.pl                           #
# Description: This script takes an argument   #
#              reporesenting a web page url    #
# Format: perl mymonitor.pl http://www.xyz.com #
################################################
use threads;
use Benchmark;
use Time::HiRes qw(gettimeofday tv_interval);
use LWP::Simple;
use LWP::UserAgent;
use File::Path;
#get page to monitor
my $pageToMonitor = shift;
my $ua = new LWP::UserAgent;
my $res;
#cleanup temp files
rmtree( './temp', {keep_root => 1} );
# start timer
my $start_time = [ gettimeofday ];
$res = $ua->get("$pageToMonitor",':content_file' => "./temp/temp.dat");
# stop timer
my $end_time = [ gettimeofday ];
my $elapsedtime = tv_interval($start_time,$end_time);
##### CREATING DATA FILES #####################################
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
    = localtime time;
$year += 1900;
$mon++;
# Create today.txt
open OUTPTR, ">>./report/today/today.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;
$elapsedtime
";
close OUTPTR;
# Create month.txt
open OUTPTR, ">>./report/month/month.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;
$elapsedtime
";
close OUTPTR;
# Create year.txt
open OUTPTR, ">>./report/year/year.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;
$elapsedtime
";
close OUTPTR;
# Create historical.txt
open OUTPTR, ">>./report/historical/historical.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;$elapsedtime
";
close OUTPTR;

inlinksAnalysis.pl

#!/usr/local/bin/perl
###########################################################
# File: inlinksAnalysis.pl                                #
# Description: This script performs analysis on Yahoo!    #
#              inbound links TSD file                     #
###########################################################
use LWP::Simple;
use LWP::UserAgent;
use HTML::TokeParser;
my @URLs = ();
#get the input param name of the file
my $fileToProcess = $ARGV[0];
my $baseurl = $ARGV[1];
print "
Processing: $fileToProcess";
my $cnt = 0;
# open the file
if (-e "$fileToProcess"){
   open FILE, "$fileToProcess" or die $!;
   while (<FILE>) {
      my $line = $_;
      my @fragments = split(/	/, $line);
      my $url = $fragments[1];
      $URLs[$cnt] = $url;
      $cnt++;
   }
} else {
    print "
file ($fileToProcess) does not exist";
}
my $ua = new LWP::UserAgent;
my $res;
$ua->agent("My Crawler");
my %linkPopHash = ();
my %anchorPopHash = ();
for(my $i=0; $i<=$cnt; $i++) {
   $res = $ua->get("$URLs[$i]",':content_file' => "temp.txt");
   if (-e "temp.txt") {
      my $p = HTML::TokeParser->new("temp.txt");
      while (my $token = $p->get_tag("a")) {
         #get link and anchor text
         my $url = $token->[1]{href} || "-";
         my $anchorText = $p->get_trimmed_text("/a");
         $url =~ s/^s+//g;
         $url =~ s/s+$//g;
         my $text = $p->get_trimmed_text("/a");
         if ($url =~ /$baseurl/i) {
            #print "
$baseurl URL: $URLs[$i] LINK: $url";
            if(exists $linkPopHash{$url}){
               $linkPopHash{$url} = $linkPopHash{$url} + 1;
               $anchorPopHash{$url} = $anchorText;
            } else {
               $linkPopHash{$url} = 1;
               $anchorPopHash{$url} = $anchorText;
            }
         }
      }
   }
}
open (FP, '>report.txt'),
foreach my $key ( sort { $linkPopHash{$b} <=> $linkPopHash{$a} }
keys %linkPopHash ) {
   print FP "$key, $linkPopHash{$key}, "$anchorPopHash{$key}"
";
}
close (FP);

Chapter 6

searchPhraseReportGoogle.pl

#!/usr/bin/perl
#----------------------------------#
#  PROGRAM:  Search Phrase Report  #
#----------------------------------#

$numArgs = $#ARGV + 1;

%googleDirCnt = ();

foreach $argnum (0 .. $#ARGV) {
   print "Processing $ARGV[$argnum] file

";
   $LOGFILE = "$ARGV[$argnum]";
   open(LOGFILE) or die("Could not open log file: $ARGV[$argnum].");
   foreach $line (<LOGFILE>) {
     #do Google analysis
     if(($line =~ /q=/) && ($line =~ /google/)) {
         @tmp1 = split ('GET ',$line);
         @tmp2 = split (' ', $tmp1[1]);
         @tmp3 = split ('q=', $tmp1[1]);
         @tmp4 = split ('&', $tmp3[1]);
         #do some cleanup
         $tmp4[0] =~ s/+/ /;
         $tmp4[0] =~ s/\%20/ /g;
         $tmp4[0] =~ s/\%3C/</gi;
         $tmp4[0] =~ s/\%3E/>/gi;
         $tmp4[0] =~ s/\%23/#/g;
         $tmp4[0] =~ s/\%22/"/g;
         $tmp4[0] =~ s/\%25/\%/g;
         $tmp4[0] =~ s/\%3A/:/gi;
         $tmp4[0] =~ s/\%2F///gi;
         $tmp4[0] =~ s/\%2B/+/gi;
         @tmp5 =  split ('"', $tmp4[0]);
         $tmpKey = "<tr><td>".$tmp2[0]." </td><td>".$tmp5[0]."</td>";
         $googleDirCnt{$tmpKey} = $googleDirCnt{$tmpKey} +1;
     }
   }
   close(LOGFILE);
}

open (FP, '>keywordsummary.html'),
print FP "<html><head><title>Keyword Summary</title><head>";
print FP "<body><strong>Google Summary</strong>";
print FP "<table width=400><tr><td><b>Resource/URL</b></td><td><b>Keyword</b></td>";
print FP "<td><b>Count</b></td><tr>";
foreach $key (sort hashValueDescendingNum (keys(%googleDirCnt))) {
       print FP $key."<td>".$googleDirCnt{$key}."</td></tr>";
}
print FP "</table></body></html>";
close (FP);


sub hashValueDescendingNum {
   $googleDirCnt{$b} <=> $googleDirCnt{$a};
}

Chapter 13

getRankings.pl

#!/usr/local/bin/perl

###########################################
# File: getRankings.pl                    #
# Description: This script queries SEs    #
#              to produce rankings report #
###########################################

### Basic setup part
$numOfArgs = $#ARGV + 1;
$originalkeywordphrase = "";
$targeturl="";

if ( ($numOfArgs == 0) || ($numOfArgs == 1) || ($numOfArgs < 0)) {
   print ("

Usage: perl getRanking.pl [TargetURL] [Keyword]
");
   print ("
OR
");
   print ("
Usage: perl getRanking.pl [TargetURL] [Keyword1] [Keyword2] ... 
[KeywordN]

");
   exit(0);
}

$targeturl=$ARGV[0];

if ( $numOfArgs == 2){
   $originalkeywordphrase = $ARGV[1];
}else {
   foreach $argnum (1 .. $#ARGV) {
      $originalkeywordphrase =  $originalkeywordphrase . " " . $ARGV[$argnum];

      #remove leading & trailing spaces
      $originalkeywordphrase =~ s/^s+//;
      $originalkeywordphrase =~ s/s+$//;
   }
}

$keywordphrase= $originalkeywordphrase;
$keywordphrase =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;

# define Source Urls
$listingNo=100;

$gurl= 
"http://www.google.com/search?num=$listingNo&hl=en&safe=off&q=$keywordphrase&sa=N";
$burl= "http://www.bing.com/search?q=$keywordphrase&first=1&count=100&";

### get SERP pages part
# get google SERP
$gserp = `wget "$gurl" 
--user-agent="Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
--output-document="gserp.html" --cookies=off`;
# get Bing SERP
$bserp = `wget "$burl" 
--user-agent="Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
--output-document="bserp.html" --cookies=off`;

### analysis part
$googlePositionNumber = getPosition ($targeturl, "google");
$bingSearchPositionNumber = getPosition ($targeturl, "bing");

# report part
##########################
print "
Ranking Summary Report
";
print   "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
";
print   "Keyword/Phrase: $originalkeywordphrase
";
print   "Target URL: $targeturl
";
print   "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
";
print   " Google.....: $googlePositionNumber
";

if($bingSearchPositionNumber ne "not found"){
   $cntAdjusted = $bingSearchPositionNumber + 1;
   print   " Bing Search: $cntAdjusted
";
}else{
   print   " Bing Search: $bingSearchPositionNumber
";
}
print "
Note: Check with specific SE to ensure correctness.
";

##### SUBROUTINES ####################################
sub getContent {
   $filename=shift;
   open INPUT, "<$filename";
   undef $/;
   $content = <INPUT>;
   close INPUT;
   #Restore behaviour
   $/ = "
";

   #substitute new line character with space character
   $content =~ s/
/ /g;
   #substitute quotes with nothing
   $content =~ s/"//g;

   #cleanup bing
   $content =~ s/<strong>//g;
   $content =~ s/</strong>//g;

   $content =~ s/<cite>//g;
   $content =~ s/</cite>//g;

   return $content;
}

sub getPosition {
   $targeturl= shift;
   $se = shift;
   @tokens = ();
   $offset = 0;
   if($se eq "google") {
      $gcontent = getContent("gserp.html");
      @tokens = split(/h3 class=r/, $gcontent);
   } elsif($se eq "bing") {
      $bcontent = getContent("bserp.html");
      @tokens = split(/sa_cc/, $bcontent);
      $offset=2;
   }

   $mastercnt = "not found";
   $cnt=0;
   $foundFlag = "no";
   print "number of tokens:". $#tokens;
   foreach $token (@tokens) {
      #print "
token: $token";
      if ($token =~ /$targeturl/gi) {
         if($foundFlag eq "no") {
            $mastercnt = $cnt - $offset;
         } else {
            $mastercnt = "" . $mastercnt . "," . $cnt;
         }
         #print "
MATCH: $targeturl cnt: $cnt $mastercnt
 token";

         #got a match return back position number
         $foundFlag = "yes";
      }
      $cnt = $cnt + 1;
   }
   return $mastercnt;
}

Chapter 15

sql.txt

CREATE TABLE `mytest`.`queue` (
`id` INT( 6 ) NOT NULL AUTO_INCREMENT PRIMARY KEY ,
`message` TEXT NOT NULL ,
`status` INT( 1 ) NOT NULL DEFAULT '0'
) ENGINE = MYISAM ;

config.php

<?
# change all lines but the last line (Twitter status update link)
#database
$username="your-db-username";
$password="your-db-password";
$database="your-database-name";

#twitter
$tusrid = 'your-twitter-userid';
$tpasswd = 'your-twitter-password';
$tURL = 'http://twitter.com/statuses/update.xml';

?>

index.php

<html>

<head>

<title>Home</title>

<script>
function limitText(limitField, limitNum) {
    if (limitField.value.length > limitNum) {
        limitField.value = limitField.value.substring(0, limitNum);
    }
}
</script>


</head>

<body>

<h3> What will you be doing? <br>(or what do you want others to think you are doing)
</h3><br>
<form name=mainform method=post action=add.php onSubmit="return checkLength(this)">

<textarea name="message" rows="3" cols="80" onKeyDown="limitText(this,140);"
onKeyUp="limitText(this,140);">
</textarea> <br>
<input type=submit value='Add Future Tweet'>

</form>
<br>


<?php
include("config.php");
mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");
$query="SELECT * FROM queue where status=0 order by id desc";
$result=mysql_query($query);

$numOfRecords=mysql_numrows($result);

mysql_close();

echo "<b>My Future Tweets</center></b><br><hr>";

?>

<table border="1" cellspacing="2" cellpadding="2">
<tr>
<td><b>id</b></td>
<td><b>Tweet</b></td>
<td><b>Status</b></td>
</tr>

<?
$i=0;
while ($i < $numOfRecords) {
   $id=mysql_result($result,$i,"id");
   $message=mysql_result($result,$i,"message");
   $status=mysql_result($result,$i,"status");
?>

<tr>
<td nowrap><? echo "$id"; ?></td>
<td width=350><? echo "$message"; ?>

   <?php

   $tmp = "";
   if ($status < 1) {
     $tmp = "Not Sent";
   }

   ?>
&nbsp; <a href="delete.php?id=<?php echo $id ?>">Delete</a>

</td><td nowarp><? echo "$tmp"; ?></td>
</tr>
   <?
   $i=$i+1;
}
?>
</table>
</body>
</html>

add.php

<?
include("config.php");

mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");

$message = $_POST['message'];

$query = "INSERT INTO queue (message) VALUES ('$message')";
mysql_query($query);

mysql_close();

?>

<script>
alert('Tweet Added'),
window.location.href = "index.php";
</script>

delete.php

<?
include("config.php");

mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");

$myid = $_GET['id'];

$query="UPDATE queue SET status=2 WHERE id=$myid";

mysql_query($query);

mysql_close();

?>

<script>
alert('Tweet Removed'),
window.location.href = "index.php";
</script>

sendTweet.php

<?php

include("config.php");

mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");

### get the tweet
$result = 
mysql_query("select id, message from queue where status=0 order by id asc LIMIT 1");

$row = mysql_fetch_array($result);

### send the tweet
$curl_handle = curl_init();
curl_setopt($curl_handle, CURLOPT_URL, "$tURL");
curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2);
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl_handle, CURLOPT_POST, 1);

$message = $row['message'];

curl_setopt($curl_handle, CURLOPT_POSTFIELDS, "status=$message");
curl_setopt($curl_handle, CURLOPT_USERPWD, "$tusrid:$tpasswd");

$response = curl_exec($curl_handle);

curl_close($curl_handle);

// get the status message
if (empty($response)) {
    echo 'tweet not delivered';
} else {
    echo 'tweet delivered';
    ###update db status
    $mid = $row['id'];
    mysql_query("UPDATE queue SET status = 1 WHERE id = $mid");
}

mysql_close();
?>

Crontab

# Tweet 5 times a day at 7am, 9am, 11am, 1pm and 3pm
* 8,10,12,14,16 * * * php sendTweet.php

Chapter 18

The following code listings represent only the main listings. For the full source code please visit book.seowarrior.com.

index.html

<html>
<head>

<title>SEO Warrior: Keyboard Dashboard (Alfa)</title>
<link rel="stylesheet" type="text/css" href="pagestyle.css" />
<script src="functions.js" type="text/javascript"></script>
<script src="dockablewindow.js" type="text/javascript"></script>
</head>

<body>
<table width=100% cellpadding=0 cellspacing=0 border=0>
<tr>
<td valign=top align=left><h1 style='color=blue'>SEO Warrior: 
Keyword Dashboard (Alpha) </h1>
</td>
<td valign=top align=right>
<img border=0 src="http://www.seowarrior.net/images/status.png" 
title="SEO Warrior: Keyword Dashboard Status">

<a href="http://www.seowarrior.net/contact/" title="Report Bugs">
<font size=2>Report Bugs</font></a> |
<a href="http://www.seowarrior.net/contact/" title="Make a Suggestion">
<font size=2>Suggestion</font></a>
 <a href="http://www.seowarrior.net"><img border=0 
src="http://www.seowarrior.net/images/seowarriormini.png" title="SEO Warrior: 
Keyword Dashboard"></a>
</td>
</tr>
</table>

<div id="formdiv">
  <form name="mainform" onSubmit="return false;">
  Keyword: <input type="text" id="keyword" name="keyword" size="20">
  <input type="button" id="phaseGoogleBtn" name="phaseGoogleBtn" value="Google" 
onclick="stepOne('google')">
  <input type="button" id="phaseBingBtn" name="phaseBingBtn" value="Bing" 
onclick="stepOne('bing')">
  <input type="button" id="phaseYahooBtn" name="phaseYahooBtn" value="Yahoo!" 
onclick="stepOne('yahoo')">

    <input type="radio" name="resultLimit" value="10" checked >10
    <input type="radio" name="resultLimit" value="20">20
    <input type="radio" name="resultLimit" value="50">50
    [Results]
  </form>
</div>

<iframe onLoad="resizeG()" name="responsedivgoogle" id="responsedivgoogle" 
scrolling="no"></iframe>
<iframe onLoad="resizeY()" name="responsedivyahoo" id="responsedivyahoo" 
scrolling="no"></iframe>
<iframe onLoad="resizeB()" name="responsedivbing" id="responsedivbing" 
scrolling="no"></iframe>

<iframe name="detailsframe" id="detailsframe" class="dockclass"></iframe>

<script type="text/javascript">
var dock0=new dockit("detailsframe", 0);
</script>

</body>

</html>

bParser.php

<html>
<head>


<style>
body {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  text-decoration : bold;
  background : #f3f3f3;
}

a:hover {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  background : #989898;
  text-decoration : bold;
}

a:visited, a:link, a:active {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  color : #000022;
  text-decoration : normal;
}
</style>


</head>

<body>

<b>Bing</b>
<br>Keyword: <?=$_GET["keyword"]?>
<br>Showing</b> <?=$_GET["resultLimit"]?> results
<br><hr>
<?

function getBaseURL($url){
   list($part1, $part2) = split("://", $url);
   list($part3, $part4) = split("/", $part2);
#   $baseurl =  $part1 . "://" . $part3;
   $baseurl =  $part3;
   return $baseurl;
}


function getBingSERP($mykeyword, $myindex){
    $reg_ex = "[[:space:]]";
    $replace_word = "+";
    $str = $mykeyword;
    $mykeyword = ereg_replace($reg_ex, $replace_word, $str);

    $url = "http://www.bing.com/search?q=".$mykeyword."&first=".$myindex."&";
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_COOKIEFILE, "c:cookie.txt");
    $client = $_SERVER['HTTP_USER_AGENT'];
    curl_setopt($ch, CURLOPT_USERAGENT, "$client");
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    $output = curl_exec($ch);
    curl_close($ch);
    return $output;
}


function processSERP($serp, $masterCnt, $rowLimit) {
   $dom = new DOMDocument();
   @$dom->loadHTML($serp);
   $xpath = new DOMXPath($dom);
   $hrefs = $xpath->evaluate("/html/body//a");
   $sofar = "";
   for ($i = 0; $i < $hrefs->length; $i++) {
      $href = $hrefs->item($i);
      $url = $href->getAttribute('href'),

      $baseurl = getBaseURL($url);

      $urlChunks = spliti (" ", $_GET["keyword"]);

      foreach ($urlChunks as $chunk) {
         $highChunk = '<B>'.$chunk.'</B>';
         $baseurl = str_replace("$chunk", "$highChunk", $baseurl);
      }

      $anchortext = $href->nodeValue;

      if ( (preg_match("/live.com/i", "$url")) ||
          (preg_match("/msn.c/i", "$url")) ||
          (preg_match("/microsoft.com/i", "$url")) ) {
      }else {
         if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {
            if (strpos($sofar, $baseurl) !== false) {
            } else {
               if($masterCnt < $rowLimit){
                  ?>
<a target=detailsframe href='kw.php?url=<?=$url?>&keyword=<?=$_GET['keyword']?>' 
title='<?=$anchortext?>'><?=$baseurl?></a><br><?
                  $masterCnt++;
               }
            }
         }
      }
      $sofar = $sofar . $baseurl;
   }
   return $masterCnt;
}

$rowLimit = $_GET["resultLimit"];

$masterCnt = 0;

$next = 1;
$keyword = $_GET["keyword"];
$serpRes = getBingSERP($keyword, $next);
$masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
flush();

if($masterCnt<$rowLimit) {
   sleep(rand(1, 3));
   $next = $first+10;
   sleep(rand(2, 6));
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
   flush();
}

if($masterCnt<$rowLimit) {
   $next = $next+10;
   sleep(rand(1, 3));
   $serpRes = getBingSERP($keyword, $next);
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
   flush();
}

if($masterCnt<$rowLimit) {
   $next = $next+10;
   sleep(rand(1, 3));
   $serpRes = getBingSERP($keyword, $next);
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
   flush();
}

if($masterCnt<$rowLimit) {
   $nextRes = $next+10;
   sleep(rand(1, 3));
   $serpRes = getBingSERP($keyword, $next);
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
}

?>


</body>
</html>

gParser.php

<html>

<head>

<style>
body {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  text-decoration : bold;
  background : #f3f3f3;
}

a:hover {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  background : #989898;
  text-decoration : bold;
}

a:visited, a:link, a:active {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  color : #000022;
  text-decoration : normal;
}
</style>

</head>
<body>

<b>Google</b>
<br>Keyword: <?=$_GET["keyword"]?>
<br>Showing</b> <?=$_GET["resultLimit"]?> results
<br><hr>
<?
function getBaseURL($url){
   list($part1, $part2) = split("://", $url);
   list($part3, $part4) = split("/", $part2);
#   $baseurl =  $part1 . "://" . $part3;
   $baseurl =  $part3;
   return $baseurl;
}

function getGoogleSERP($mykeyword){

    $reg_ex = "[[:space:]]";
    $replace_word = "+";
    $str = $mykeyword;
    $mykeyword = ereg_replace($reg_ex, $replace_word, $str);

    $url = "http://www.google.com/search?q=".$mykeyword.".&num=50&";
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    $client = $_SERVER['HTTP_USER_AGENT'];
    curl_setopt($ch, CURLOPT_USERAGENT, "$client");
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    $output = curl_exec($ch);
    curl_close($ch);
    return $output;
}


$rowLimit = $_GET["resultLimit"];

$keyword = $_GET["keyword"];
$serp = getGoogleSERP($keyword);

$dom = new DOMDocument();
@$dom->loadHTML($serp);
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
$sofar = "";
$intCnt = 0;
for ($i = 0; $i < $hrefs->length; $i++) {
   $href = $hrefs->item($i);
   $url = $href->getAttribute('href'),

   $baseurl = getBaseURL($url);

   $anchortext = $href->nodeValue;

   $urlChunks = spliti (" ", $keyword);

   foreach ($urlChunks as $chunk) {
      $highChunk = '<B>'.$chunk.'</B>';
      $baseurl = str_replace("$chunk", "$highChunk", $baseurl);
   }

   if ( (preg_match("/google.com/i", "$url")) ||
        (preg_match("/youtube.com/i", "$url")) ||
        (preg_match("/^//i", "$url")) ||
        (preg_match("/cache:/i", "$url")) ) {
   }else {
      if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {

        if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {
            if (strpos($sofar, $baseurl) !== false) {
            } else {
               if($intCnt < $rowLimit) {
                  ?>
<a target=detailsframe href='kw.php?url=<?=$url?>&keyword=<?=$_GET['keyword']?>' 
title='<?=$anchortext?>'><?=$baseurl?></a><br><?
                  $intCnt++;
               }

            }
         }
      }
   }

   $sofar = $sofar . $baseurl;
}

?>

</body>
</html>

yParser.php

<html>
<head>


<style>
body {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  text-decoration : bold;
  background : #f3f3f3;
}

a:hover {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  background : #989898;
  text-decoration : bold;
}

a:visited, a:link, a:active {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  color : #000022;
  text-decoration : normal;
}
</style>


</head>

<body>

<b>Yahoo!</b>
<br>Keyword: <?=$_GET["keyword"]?>
<br>Showing</b> <?=$_GET["resultLimit"]?> results
<br><hr>
<?

function getBaseURL($url){
   list($part1, $part2) = split("://", $url);
   list($part3, $part4) = split("/", $part2);
#   $baseurl =  $part1 . "://" . $part3;
   $baseurl =  $part3;
   return $baseurl;
}


function getYahooSERP($mykeyword){

    $reg_ex = "[[:space:]]";
    $replace_word = "+";
    $str = $mykeyword;
    $mykeyword = ereg_replace($reg_ex, $replace_word, $str);

    $url = "http://search.yahoo.com/search;_ylt=?p=".$mykeyword.".&n=100&";
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_REFERER, "http://search.yahoo.com/");
    $client = $_SERVER['HTTP_USER_AGENT'];
    curl_setopt($ch, CURLOPT_USERAGENT, "$client");
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    $output = curl_exec($ch);
    curl_close($ch);
    return $output;
}

$rowLimit = $_GET["resultLimit"];

$keyword = $_GET["keyword"];
$serp = getYahooSERP($keyword);

$dom = new DOMDocument();
@$dom->loadHTML($serp);
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
$sofar = "";

$intCnt = 0;
for ($i = 0; $i < $hrefs->length; $i++) {
   $href = $hrefs->item($i);
   $url = $href->getAttribute('href'),
   $tmpurl = "";
   list($tmp1, $tmpurl) = split('**', $url, 2);
   $tmpurl = urldecode($tmpurl);
   $baseurl = getBaseURL($tmpurl);

   $urlChunks = spliti (" ", $keyword);

   foreach ($urlChunks as $chunk) {
      $highChunk = '<B>'.$chunk.'</B>';
      $baseurl = str_replace("$chunk", "$highChunk", $baseurl);
   }


   $anchor = $href->getAttribute('title'),
   $anchortext = $href->nodeValue;
   if ( preg_match("/**/i", "$url") )  {

      if ( preg_match("/yahoo.com/i", "$baseurl") || preg_match("/cache/i", 
"$url") )  {
      } else {
         if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {
            if (strpos($sofar, $baseurl) !== false) {
            } else {
               if($intCnt < $rowLimit) {
                  ?>
<a target=detailsframe href='kw.php?url=<?=$tmpurl?>&keyword=<?=$_GET['keyword']?>' 
title='<?=$anchortext?>'><?=$baseurl?></a><br><?

                  $intCnt++;
               }
            }
         }
      }
   }
   $sofar = $sofar . $baseurl;
}

?>
</body>
</html>

..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.

Table of Contents for A. Script Listings

Create new playlist

Sign In

Sign Up