Appendix A. Script Listings

Chapter 2

Please note that some of these scripts are used to query search engines. Take extra caution before using these scripts in particular.

spiderviewer.php

<html>

<head>
<title>Search Engine Web Page Viewer</title>
</head>

<body>

<form name=mainform action="" method="get">
<table border="0" width="100%" align=center>
    <tr>
        <td>Enter URL: <br>
        <input type="text" name="url" size="20"></td>
    </tr>
    <tr>
        <td>
        <input type="submit" value="Click to See Search Engine View" name="submit">
        </td>
    </tr>
</table>
</form>
<hr>

<?php

$myurl = $_GET['url'];

if (isset($myurl)) {
   print spiderViewer($myurl);
}

?>

</body>
</html>

<?php

function spiderViewer($url) {
    $finalHTML='';
    if($url) {
      $originalHTML=get_content($url);
      if($originalHTML) {
        $finalHTML.='<table border="0" align="center" width="75%">';
        $finalHTML.='<tr><td align="center" valign="top">';
        $finalHTML.='<b>Search Engine View for URL:'  . $url .
'</b></tr>';
        $finalHTML.='<tr><td align="left" valign="top">';
        $originalHTML=preg_replace('/<script.*?>.*?</script.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<object.*?>.*?</object.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<applet.*?>.*?</applet.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<style.*?>.*?</style.*?>/sim'
,'', $originalHTML);
        $originalHTML=preg_replace('/<.*?>/sim','',$originalHTML);
        $originalHTML=preg_replace('/&[#]{0,1}.[^ ]*;/sim',' '
,$originalHTML);
        $stopWordsArray=explode("<br />",
file_get_contents('stopwords.txt'));

        for($tmploop=0;$tmploop<count($stopWordsArray);$tmploop++) {
            $originalHTML=preg_replace('/[W]{1,1}' .
$stopWordsArray[$tmploop] . '[W]{1,1}/sim','',$originalHTML);
        }

        $originalHTML=preg_replace('/[^A-Z0-9a-z.?!;,-
 ]*/sim',''
,$originalHTML);
        $originalHTML=preg_replace('/[
 ]{2,1000}/sim',' '
,$originalHTML);
        $finalHTML.= $originalHTML . '</td></tr></table>';
      } else {
        $finalHTML='Please check your URL.';
      }
    } else {
      $finalHTML='The url you entered was invalid.';
    }
    return $finalHTML;
}

function get_content($url)
{
   $ch = curl_init();
   curl_setopt ($ch, CURLOPT_URL, $url);
   curl_setopt ($ch, CURLOPT_HEADER, 0);
   curl_setopt($ch, CURLOPT_FAILONERROR, 0);
   curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
   curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/4.0 (compatible;
MSIE 8.0; Windows NT 6.0)'),
   curl_setopt($ch, CURLOPT_TIMEOUT, 30);
   if(preg_match('/^https:///sim',$url)==true) {
      curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
      curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
   }
   ob_start();
   curl_exec ($ch);
   curl_close ($ch);
   $string = ob_get_contents();
   ob_end_clean();
   return $string;
}

Chapter 3

layout1.html

<html>
<head>
<style>
#navigation {
position: absolute;
top: 10px;
left: 50%;
width: 800px;
margin-left: −400px;
text-align: left;
}

#content {
position: absolute;
top: 150px;
left: 50%;
width: 800px;
margin-left: −400px;
text-align: left;
}

body {
    text-align: center;
    min-width: 600px;
}
</style>
</head>
<body>

<div id="content">content<!-- SEO optimized content text goes here.--></div>

<div id="navigation">navigation<!-- navigational elements, ads go here--></div>

</body>
</html>

layout2.html

<html>
<head>

<style>
#navigation {
position: absolute;
top: 0px;
left: 400;
width: 200px;
margin-left: −400px;
text-align: left;
}

#content {
position: absolute;
top: 0px;
left: 600;
width: 600px;
margin-left: −400px;
text-align: left;
}

body {
    text-align: center;
    min-width: 800px;
}
</style>
</head>
<body>

<div id="content">
SEO optimized content text goes here.</div>

<div id="navigation">navigational elements, ads go here</div>

</body>
</html>

layout3.html

<html>
<head>
<style>

#top {
position: absolute;
top: 10px;
left: 50%;
width: 800px;
margin-left: −400px;
text-align: left;
}

#left {
position: absolute;
top: 150px;
left: 50%;
width: 200px;
margin-left: −400px;
text-align: left;
}

#main {
position: absolute;
top: 150px;
left: 50%;
width: 600px;
margin-left: −200px;
text-align: left;
}

#right {
position: absolute;
top: 150px;
left: 50%;
width: 200px;
margin-left: 0px;
text-align: left;
}

body {
    text-align: center;
    min-width: 800px;
}

</style>
</head>

<body>

<div id="main">optimized main body</div>

<div id="left">left panel</div>

<div id="top">top panel</div>

<div id="right">right panel</div>

</body>

</html>

Chapter 4

rankingfactors.pl

#!/usr/local/bin/perl
###########################################################
# File: rankingfactors.pl                                 #
# Description: This script performs analysis on several   #
#              ranking factors including:                 #
#  1) Keywords in Page Titles                             #
#  2) Keywords in Domain Names                            #
#  3) Keywords in Page Copy                               #
#  4) Keywords in Headings                                #
#  5) Keywords in Meta description                        #
#  6) Keyword Proximety                                   #
#  7) Keywords in Outbound Links                          #
#  8) Page Size                                           #
#  9) Words per Page                                      #
# 10) Website Size                                        #
# and more...                                             #
#                                                         #
# Format: perl rankingfactors.pl 10|100 keyword(s)        #
###########################################################

use LWP::Simple;
use LWP::UserAgent;
use HTML::TokeParser;
use HTML::TreeBuilder;
use File::Path;
use Math::Round qw(:all);

my $keyphrase = "";

my @googleLinks  = ( );
my @googleTitles = ( );
my @yahooLinks   = ( );
my @yahooTitles  = ( );
my @bingLinks    = ( );
my @bingTitles   = ( );

#buid keyphrase/keyword if necessary
foreach $argnum (1 .. $#ARGV) {
   $keyphrase = $keyphrase . "$ARGV[$argnum] ";
}
my $numres = $ARGV[0];
$keyphrase =~ s/^s+//;
$keyphrase =~ s/s+$//;
$keyphrase =~ s/'//g;
$keyphrase =~ s/"//g;


print "
Starting..";
#cleanup temp files
rmtree( './serptemp', {keep_root => 1} );
print "
..cleanup done";
#initialize variables
initializeKeyVariables($keyphrase,     @googleLinks,
                    @googleTitles, @yahooLinks,
                    @yahooTitles,  @bingLinks,
                    @bingTitles);

#let's store all destination links found on SERPs
print "
..getting SERPs";
getSERPResults($#googleLinks, @googleLinks, "google");
getSERPResults($#yahooLinks,@yahooLinks, "yahoo");
getSERPResults($#bingLinks,@bingLinks, "bing");
print "
..got the SERPs";

#-------------------TITLE Analysis-----------------------
#get real titles
my @googleRealTitles = ( );
my @yahooRealTitles  = ( );
my @bingRealTitles   = ( );
getRealTitles($#googleLinks, @googleRealTitles, "google");
getRealTitles($#yahooLinks,@yahooRealTitles, "yahoo");
getRealTitles($#bingLinks,@bingRealTitles, "bing");
print "
..got the real titles";

#compare real titles with titles on SERPs
my @googleTitleComp = ( );
my @yahooTitleComp  = ( );
my @bingTitleComp   = ( );
my $percentMatchTitlesGoogle = compareArrays($#googleTitles,@googleRealTitles, 
@googleTitles,

@googleTitleComp);
my $percentMatchTitlesYahoo = compareArrays($#yahooTitles,@yahooRealTitles, 
@yahooTitles, @yahooTitleComp);
my $percentMatchTitlesBing = compareArrays($#bingTitles,@bingRealTitles, 
@bingTitles, @bingTitleComp);
print "
..finished partial title comparisons";

#find keyword title matches
my @googleKeywordTitleMatch = ( );
my @yahooKeywordTitleMatch  = ( );
my @bingKeywordTitleMatch   = ( );
getKeywordsTitleMatch($keyphrase, @googleRealTitles,$#googleRealTitles, 
@googleKeywordTitleMatch );

getKeywordsTitleMatch($keyphrase, @yahooRealTitles, $#yahooRealTitles, 
@yahooKeywordTitleMatch);
getKeywordsTitleMatch($keyphrase, @bingRealTitles, $#bingRealTitles, 
@bingKeywordTitleMatch);
print "
..finished keyword title comparisons";

#find if keyword in title found in page copy
my @googleKeywordTitlePageCopy = ( );
my @yahooKeywordTitlePageCopy  = ( );
my @bingKeywordTitlePageCopy   = ( );
compareTitlePageCopy($#googleRealTitles,@googleRealTitles, 
@googleKeywordTitlePageCopy, "google");
compareTitlePageCopy($#yahooRealTitles,@yahooRealTitles, 
@yahooKeywordTitlePageCopy, "yahoo");
compareTitlePageCopy($#bingRealTitles,@bingRealTitles, 
@bingKeywordTitlePageCopy, "bing");
print "
..finished title page copy comparisons";


#-------------------Domain Name Analysis-----------------------

#exact match
my @googleDomainKeywordExactMatch = ( );
my @yahooDomainKeywordExactMatch  = ( );
my @bingDomainKeywordExactMatch   = ( );
my $percentDomainKeywordExactMatchGoogle =  keywordDomainExactMatch($keyphrase, 
@googleLinks, $#googleLinks,

@googleDomainKeywordExactMatch);
my $percentDomainKeywordExactMatchYahoo = keywordDomainExactMatch($keyphrase, 
@yahooLinks, $#yahooLinks,

@yahooDomainKeywordExactMatch);
my $percentDomainKeywordExactMatchBing = keywordDomainExactMatch($keyphrase, 
@bingLinks, $#bingLinks,

@bingDomainKeywordExactMatch);
print "
..finished domain name exact keyword analysis";


#partial match
my @googleDomainKeywordPartialMatch = ( );
my @yahooDomainKeywordPartialMatch  = ( );
my @bingDomainKeywordPartialMatch   = ( );
$percentDomainKeywordPartialMatchGoogle =  keywordDomainPartialMatch($keyphrase, 
@googleLinks, $#googleLinks,

@googleDomainKeywordPartialMatch);
$percentDomainKeywordPartialMatchYahoo = keywordDomainPartialMatch($keyphrase, 
@yahooLinks, $#yahooLinks,

@yahooDomainKeywordPartialMatch);
$percentDomainKeywordPartialMatchBing = keywordDomainPartialMatch($keyphrase, 
@bingLinks, $#bingLinks,

@bingDomainKeywordPartialMatch);
print "
..finished domain name partial keyword analysis";



#-------------------Page Copy Analysis----------------------------
my @googleKeywordDensity = ( );
my @yahooKeywordDensity  = ( );
my @bingKeywordDensity   = ( );

my $googleAvgDensity = keywordDensity($#googleLinks, $keyphrase, 
@googleKeywordDensity, "google");
my $yahooAvgDensity = keywordDensity($#yahooLinks, $keyphrase, 
@yahooKeywordDensity, "yahoo");
my $bingAvgDensity = keywordDensity($#bingLinks, $keyphrase, 
@bingKeywordDensity, "bing");

#-------------------Description META Tag Analysis------------------
my @googleDescriptionMetaExact = ( );
my @yahooDescriptionMetaExact  = ( );
my @bingDescriptionMetaExact   = ( );

checkExactDescriptionMeta($#googleLinks, @googleDescriptionMetaExact, 
$keyphrase, "google");
checkExactDescriptionMeta($#yahooLinks, @yahooDescriptionMetaExact, 
$keyphrase, "yahoo");
checkExactDescriptionMeta($#bingLinks, @bingDescriptionMetaExact, 
$keyphrase, "bing");


my @googleDescriptionMetaPartial = ( );
my @yahooDescriptionMetaPartial  = ( );
my @bingDescriptionMetaPartial   = ( );

checkPartialDescriptionMeta($#googleLinks, @googleDescriptionMetaPartial, 
$keyphrase, "google");
checkPartialDescriptionMeta($#yahooLinks, @yahooDescriptionMetaPartial, 
$keyphrase, "yahoo");
checkPartialDescriptionMeta($#bingLinks, @bingDescriptionMetaPartial, 
$keyphrase, "bing");
print "
..finished description META analysis";

#-------------------Header Tag Analysis----------------------------
my @googleNumberOfHeaderTags = ( );
my @yahooNumberOfHeaderTags  = ( );
my @bingNumberOfHeaderTags   = ( );
my @googleHeaderTagsKeywords = ( );
my @yahooHeaderTagsKeywords  = ( );
my @bingHeaderTagsKeywords   = ( );

checkHeaderTags($#googleLinks, @googleNumberOfHeaderTags, 
@googleHeaderTagsKeywords, "google", $keyphrase);
checkHeaderTags($#yahooLinks, @yahooNumberOfHeaderTags, 
@yahooHeaderTagsKeywords, "yahoo", $keyphrase);
checkHeaderTags($#bingLinks, @bingNumberOfHeaderTags, 
@bingHeaderTagsKeywords, "bing", $keyphrase);
print "
..finished header tags analysis";


#-------------------Keyword Proximity Analysis---------------------
my @googleKeywordPositions = ( );
my @yahooKeywordPositions  = ( );
my @bingKeywordPositions   = ( );
my @googleKeywordPositionsList = ( );
my @yahooKeywordPositionsList  = ( );
my @bingKeywordPositionsList   = ( );
analyzeKeywordPositions($#googleLinks, @googleKeywordPositions, 
@googleKeywordPositionsList, "google",

$keyphrase);
analyzeKeywordPositions($#yahooLinks, @yahooKeywordPositions, 
@yahooKeywordPositionsList, "yahoo", $keyphrase);
analyzeKeywordPositions($#bingLinks, @bingKeywordPositions, 
@bingKeywordPositionsList, "bing", $keyphrase);
print "
..finished keyword proximity analysis";


#-------------------Outbound Link Analysis--------------------------
my @googleOutboundLinkKeywords = ( );
my @yahooKOutboundLinkKeywords = ( );
my @bingOutboundLinkKeywords   = ( );
outboundLinkKeywordAnalysis($#googleLinks, @googleLinks, 
@googleOutboundLinkKeywords, "google", $keyphrase);
outboundLinkKeywordAnalysis($#yahooLinks, @yahooLinks, 
@yahooKOutboundLinkKeywords, "yahoo", $keyphrase);
outboundLinkKeywordAnalysis($#bingLinks, @bingLinks, 
@bingOutboundLinkKeywords, "bing", $keyphrase);
print "
..finished outbound links analysis";


#-------------------Outbound Link PR Analysis--------------------------
my @googleOutboundLinksPR = ( );
my @yahooKOutboundLinksPR = ( );
my @bingOutboundLinksPR   = ( );
outboundLinkPRAnalysis($#googleLinks, @googleLinks, 
@googleOutboundLinksPR, "google", $keyphrase);
outboundLinkPRAnalysis($#yahooLinks,  @yahooLinks, 
@yahooKOutboundLinksPR, "yahoo", $keyphrase);
outboundLinkPRAnalysis($#bingLinks,  @bingLinks,
@bingOutboundLinksPR, "bing", $keyphrase);
print "
..finished outbound link PR analysis";

#-------------------Average Page Size Analysis--------------------------
my @googlePageSize = ( );
my @yahooPageSize = ( );
my @bingPageSize   = ( );
my $googleAvgPageSize = averagePageSize($#googleLinks, @googlePageSize, "google");
my $yahooAvgPageSize = averagePageSize($#yahooLinks,  @yahooPageSize, "yahoo");
my $bingAvgPageSize = averagePageSize($#bingLinks,  @bingPageSize, "bing");
print "
..finished average page size analysis";


#-------------------Optimum Number of Words Analysis--------------------
my @googleWords = ( );
my @yahooWords = ( );
my @bingWords = ( );
my $googleWordsPerPage = optimumWordsPerPage($#googleLinks, @googleWords, "google");
my $yahooWordsPerPage = optimumWordsPerPage($#yahooLinks,  @yahooWords, "yahoo");
my $bingWordsPerPage = optimumWordsPerPage($#bingLinks,  @bingWords, "bing");
print "
..finished optimum number of words analysis";


#-------------------Website Size Analysis-------------------------------
my @googleResultsWebsiteSizes = ( );
my @yahooResultsWebsiteSizes = ( );
my @bingResultsWebsiteSizes = ( );
my $googleAverageWebSize = analyzeWebsiteSize($#googleLinks, @googleLinks, 
@googleResultsWebsiteSizes);
my $yahooAverageWebSize  = analyzeWebsiteSize($#yahooLinks, @yahooLinks, 
@yahooResultsWebsiteSizes);
my $bingAverageWebSize   = analyzeWebsiteSize($#bingLinks, @bingLinks, 
@bingResultsWebsiteSizes);
print "
..finished website size analysis";

#-------------------Page Age Analysis-----------------------------------
my @googlePageAge = ( );
my @yahooPageAge = ( );
my @bingPageAge = ( );
pageAgeAnalysis($#googleLinks, @googleLinks, @googlePageAge);
pageAgeAnalysis($#yahooLinks, @yahooLinks, @yahooPageAge);
pageAgeAnalysis($#bingLinks, @bingLinks, @bingPageAge);



#-------------------Create HTML Report---------------------------------

#create index file
createIndexHTML($keyphrase);

my $numberOfLinesGoogle = $#googleLinks;
my $numberOfLinesYahoo = $#yahooLinks;
my $numberOfLinesBing = $#bingLinks;

createGoogleHTMLReport();
createYahooHTMLReport();
createBingHTMLReport();


#---------------------------SUBROUTINES---------------------------
# Subroutine:
#   createGoogleHTMLReport
# Description:
#   This subroutine creates google.html file
#   which summerizes Google SERP findings
# Inputs:
#   None
# Outputs:
#   Creates google.html
# Returns:
#   Returns nothing
sub createGoogleHTMLReport {
   #create summary table first
   my $googleFile = "<html><head><title>Detailed Summary for Google</title>";
   $googleFile   .= "<style>";
   $googleFile   .= 
"body, td, tr{font-family: "Trebuchet ms", verdana, sans-serif; font-size:9px;}";
   $googleFile   .= 
"b{font-family: "Trebuchet ms", verdana, sans-serif;font-size:10px;}";
   $googleFile   .= "</style>";
   $googleFile   .= "</head>";
   $googleFile   .= "<body><h1>Ranking Report Summary</h1>";
   $googleFile   .= "<br>";
   $googleFile   .= 
"<table border="1" width="500" cellspacing="2" cellpadding="2">";
   $googleFile   .= "<tr><td colspan=2><b>Averages</b></td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Title Match</b></td>";
   my $tmp = sprintf "%.1f", $percentMatchTitlesGoogle;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Keyword Domain Exact Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordExactMatchGoogle;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Keyword Domain Partial Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordPartialMatchGoogle;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>% Keyword Density</b></td>";
   $tmp = sprintf "%.1f", $googleAvgDensity;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>Page Size [bytes]</b></td>";
   $tmp = sprintf "%.0f", $googleAvgPageSize;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>Words Per Page</b></td>";
   $tmp = sprintf "%.0f", $googleWordsPerPage;
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td><b>Website Size [of base url]</b></td>";
   $tmp = round($googleAverageWebSize);
   $googleFile   .= "<td>$tmp</td>";
   $googleFile   .= "</tr>";
   $googleFile   .= "</table><br><br>";
   $googleFile   .= "<b>Detail Table</b> <br>";
   $googleFile   .= "<table border=1 cellpadding=2 cellspacing=2>";
   $googleFile   .= "<tr>";
   $googleFile   .= "<td nowrap>#</td>";
   $googleFile   .= "<td width='100'><b>URL</b></td>";
   $googleFile   .= "<td nowrap width='150'><b>Google Title</b></td>";
   $googleFile   .= "<td nowrap width='150'><b>Page Title</b></td>";
   $googleFile   .= "<td nowrap><b>Keyword(s) found<br> in Title? [Y|N]</b></td>";
   $googleFile   .= "<td nowrap><b>Title Keywords <br>In Page Copy [%]</b></td>";
   $googleFile   .= "<td nowrap><b>Domain name <br>Exact Match</b></td>";
   $googleFile   .= "<td nowrap><b>Domain name <br>Partial Match</b></td>";
   $googleFile   .= "<td nowrap><b>Keyword Density</b></td>";
   $googleFile   .= "<td nowrap><b>META Description<br> Exact Match</b></td>";
   $googleFile   .= "<td nowrap><b>META Description<br> Partial Match</b></td>";
   $googleFile   .= "<td nowrap><b>Header Tags</b></td>";
   $googleFile   .= "<td nowrap><b>Header Tag <br>Keywords</b></td>";
   $googleFile   .= "<td nowrap width='350'><b>Keyword Positions in Page</b></td>";
   $googleFile   .= "<td nowrap><b>Keyword Prominence Map</b></td>";
   $googleFile   .= "<td nowrap><b>Outbound Links with Keywords</b></td>";
   $googleFile   .= "<td nowrap width='150'><b>Outbound Link<br> PRs</b></td>";
   $googleFile   .= "<td nowrap><b>Page Size <br>[bytes]</b></td>";
   $googleFile   .= "<td nowrap><b>Words in<br> Page</b></td>";
   $googleFile   .= "<td nowrap><b>Website Size</b></td>";
   $googleFile   .= "<td nowrap><b>Page Age</b></td>";
   $googleFile   .= "</tr>";

   for (my $i=0; $i < $numberOfLinesGoogle; $i++) {
      $googleFile   .= "<tr>";
      $googleFile   .= "<td align=left>$i&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleLinks[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleTitles[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleRealTitles[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleKeywordTitleMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.1f", $googleKeywordTitlePageCopy[$i];

      $googleFile   .= "<td align=left>$tmp&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleDomainKeywordExactMatch[$i]&nbsp;</td>";
      $googleFile   .= 
"<td align=left>$googleDomainKeywordPartialMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.3f", $googleKeywordDensity[$i];
      $googleFile   .= "<td align=left>$tmp&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleDescriptionMetaExact[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleDescriptionMetaPartial[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleNumberOfHeaderTags[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleHeaderTagsKeywords[$i]&nbsp;</td>";
      $tmp = $googleKeywordPositionsList[$i];
      $tmp =~ s/|/, /g;
      $googleFile   .= "<td align=left>$tmp&nbsp;</td>";
      $googleFile   .= 
"<td align=left><a href='./maps/google".$i.".html'>Map</a></td>";
      printIndividualKeywordProminenceMap($i, @googleKeywordPositions, "google");
      $googleFile   .= "<td align=left>$googleOutboundLinkKeywords[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleOutboundLinksPR[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googlePageSize[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleWords[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googleResultsWebsiteSizes[$i]&nbsp;</td>";
      $googleFile   .= "<td align=left>$googlePageAge[$i]&nbsp;</td>";
      $googleFile   .= "</tr>";
   }
   my $filename = "./report/google.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $googleFile;
   close FILE;
}


# Subroutine:
#   createYahooHTMLReport
# Description:
#   This subroutine creates yahoo.html file
#   which summerizes Yahoo SERP findings
# Inputs:
#   None
# Outputs:
#   Creates yahoo.html
# Returns:
#   Returns nothing
sub createYahooHTMLReport {
   #create summary table first
   my $yahooFile = "<html><head><title>Detailed Summary for Yahoo</title>";
   $yahooFile   .= "<style>";
   $yahooFile   .= 
"body, td, tr{font-family: "Trebuchet ms", verdana, sans-serif; font-size:9px;}";
   $yahooFile   .= 
"b{font-family: "Trebuchet ms", verdana, sans-serif;font-size:10px;}";
   $yahooFile   .= "</style>";
   $yahooFile   .= "</head>";
   $yahooFile   .= "<body><h1>Ranking Report Summary</h1>";
   $yahooFile   .= "<br>";
   $yahooFile   .= 
"<table border="1" width="500" cellspacing="2" cellpadding="2">";
   $yahooFile   .= "<tr><td colspan=2><b>Averages</b></td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Title Match</b></td>";
   my $tmp = sprintf "%.1f", $percentMatchTitlesYahoo;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Keyword Domain Exact Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordExactMatchYahoo;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Keyword Domain Partial Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordPartialMatchYahoo;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>% Keyword Density</b></td>";
   $tmp = sprintf "%.1f", $yahooAvgDensity;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>Page Size [bytes]</b></td>";
   $tmp = sprintf "%.0f", $yahooAvgPageSize;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>Words Per Page</b></td>";
   $tmp = sprintf "%.0f", $yahooWordsPerPage;
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td><b>Website Size [of base url]</b></td>";
   $tmp = round($yahooAverageWebSize);
   $yahooFile   .= "<td>$tmp</td>";
   $yahooFile   .= "</tr>";
   $yahooFile   .= "</table><br><br>";
   $yahooFile   .= "<b>Detail Table</b> <br>";
   $yahooFile   .= "<table border=1 cellpadding=2 cellspacing=2>";
   $yahooFile   .= "<tr>";
   $yahooFile   .= "<td nowrap>#</td>";
   $yahooFile   .= "<td width='100'><b>URL</b></td>";
   $yahooFile   .= "<td nowrap width='150'><b>Yahoo Title</b></td>";
   $yahooFile   .= "<td nowrap width='150'><b>Page Title</b></td>";
   $yahooFile   .= "<td nowrap><b>Keyword(s) found<br> in Title? [Y|N]</b></td>";
   $yahooFile   .= "<td nowrap><b>Title Keywords <br>In Page Copy [%]</b></td>";
   $yahooFile   .= "<td nowrap><b>Domain name <br>Exact Match</b></td>";
   $yahooFile   .= "<td nowrap><b>Domain name <br>Partial Match</b></td>";
   $yahooFile   .= "<td nowrap><b>Keyword Density</b></td>";
   $yahooFile   .= "<td nowrap><b>META Description<br> Exact Match</b></td>";
   $yahooFile   .= "<td nowrap><b>META Description<br> Partial Match</b></td>";
   $yahooFile   .= "<td nowrap><b>Header Tags</b></td>";
   $yahooFile   .= "<td nowrap><b>Header Tag <br>Keywords</b></td>";
   $yahooFile   .= "<td nowrap width='350'><b>Keyword Positions in Page</b></td>";
   $yahooFile   .= "<td nowrap><b>Keyword Prominence Map</b></td>";
   $yahooFile   .= "<td nowrap><b>Outbound Links with Keywords</b></td>";
   $yahooFile   .= "<td nowrap width='150'><b>Outbound Link<br> PRs</b></td>";
   $yahooFile   .= "<td nowrap><b>Page Size <br>[bytes]</b></td>";
   $yahooFile   .= "<td nowrap><b>Words in<br> Page</b></td>";
   $yahooFile   .= "<td nowrap><b>Website Size</b></td>";
   $yahooFile   .= "<td nowrap><b>Page Age</b></td>";
   $yahooFile   .= "</tr>";

   for (my $i=0; $i < $numberOfLinesYahoo; $i++) {
      $yahooFile   .= "<tr>";
      $yahooFile   .= "<td align=left>$i&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooLinks[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooTitles[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooRealTitles[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooKeywordTitleMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.1f", $yahooKeywordTitlePageCopy[$i];

      $yahooFile   .= "<td align=left>$tmp&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooDomainKeywordExactMatch[$i]&nbsp;</td>";
      $yahooFile   .= 
"<td align=left>$yahooDomainKeywordPartialMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.3f", $yahooKeywordDensity[$i];
      $yahooFile   .= "<td align=left>$tmp&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooDescriptionMetaExact[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooDescriptionMetaPartial[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooNumberOfHeaderTags[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooHeaderTagsKeywords[$i]&nbsp;</td>";
      $tmp = $yahooKeywordPositionsList[$i];
      $tmp =~ s/|/, /g;
      $yahooFile   .= "<td align=left>$tmp&nbsp;</td>";
      $yahooFile   .= 
"<td align=left><a href='./maps/yahoo".$i.".html'>Map</a></td>";
      printIndividualKeywordProminenceMap($i, @yahooKeywordPositions, "yahoo");
      $yahooFile   .= "<td align=left>$yahooOutboundLinkKeywords[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooOutboundLinksPR[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooPageSize[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooWords[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooResultsWebsiteSizes[$i]&nbsp;</td>";
      $yahooFile   .= "<td align=left>$yahooPageAge[$i]&nbsp;</td>";
      $yahooFile   .= "</tr>";
   }
   my $filename = "./report/yahoo.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $yahooFile;
   close FILE;
}


# Subroutine:
#   createBingHTMLReport
# Description:
#   This subroutine creates bing.html file
#   which summerizes Bing SERP findings
# Inputs:
#   None
# Outputs:
#   Creates bing.html
# Returns:
#   Returns nothing
sub createBingHTMLReport {
   #create summary table first
   my $bingFile = "<html><head><title>Detailed Summary for Bing</title>";
   $bingFile   .= "<style>";
   $bingFile   .= 
"body, td, tr{font-family: "Trebuchet ms", verdana, sans-serif; font-size:9px;}";
   $bingFile   .= 
"b{font-family: "Trebuchet ms", verdana, sans-serif;font-size:10px;}";
   $bingFile   .= "</style>";
   $bingFile   .= "</head>";
   $bingFile   .= "<body><h1>Ranking Report Summary</h1>";
   $bingFile   .= "<br>";
   $bingFile   .= 
"<table border="1" width="500" cellspacing="2" cellpadding="2">";
   $bingFile   .= "<tr><td colspan=2><b>Averages</b></td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Title Match</b></td>";
   my $tmp = sprintf "%.1f", $percentMatchTitlesBing;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Keyword Domain Exact Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordExactMatchBing;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Keyword Domain Partial Match</b></td>";
   $tmp = sprintf "%.1f", $percentDomainKeywordPartialMatchBing;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>% Keyword Density</b></td>";
   $tmp = sprintf "%.1f", $bingAvgDensity;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>Page Size [bytes]</b></td>";
   $tmp = sprintf "%.0f", $bingAvgPageSize;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>Words Per Page</b></td>";
   $tmp = sprintf "%.0f", $bingWordsPerPage;
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td><b>Website Size [of base url]</b></td>";
   $tmp = round($bingAverageWebSize);
   $bingFile   .= "<td>$tmp</td>";
   $bingFile   .= "</tr>";
   $bingFile   .= "</table><br><br>";
   $bingFile   .= "<b>Detail Table</b> <br>";
   $bingFile   .= "<table border=1 cellpadding=2 cellspacing=2>";
   $bingFile   .= "<tr>";
   $bingFile   .= "<td nowrap>#</td>";
   $bingFile   .= "<td width='100'><b>URL</b></td>";
   $bingFile   .= "<td nowrap width='150'><b>Bing Title</b></td>";
   $bingFile   .= "<td nowrap width='150'><b>Page Title</b></td>";
   $bingFile   .= "<td nowrap><b>Keyword(s) found<br> in Title? [Y|N]</b></td>";
   $bingFile   .= "<td nowrap><b>Title Keywords <br>In Page Copy [%]</b></td>";
   $bingFile   .= "<td nowrap><b>Domain name <br>Exact Match</b></td>";
   $bingFile   .= "<td nowrap><b>Domain name <br>Partial Match</b></td>";
   $bingFile   .= "<td nowrap><b>Keyword Density</b></td>";
   $bingFile   .= "<td nowrap><b>META Description<br> Exact Match</b></td>";
   $bingFile   .= "<td nowrap><b>META Description<br> Partial Match</b></td>";
   $bingFile   .= "<td nowrap><b>Header Tags</b></td>";
   $bingFile   .= "<td nowrap><b>Header Tag <br>Keywords</b></td>";
   $bingFile   .= "<td nowrap width='350'><b>Keyword Positions in Page</b></td>";
   $bingFile   .= "<td nowrap><b>Keyword Prominence Map</b></td>";
   $bingFile   .= "<td nowrap><b>Outbound Links with Keywords</b></td>";
   $bingFile   .= "<td nowrap width='150'><b>Outbound Link<br> PRs</b></td>";
   $bingFile   .= "<td nowrap><b>Page Size <br>[bytes]</b></td>";
   $bingFile   .= "<td nowrap><b>Words in<br> Page</b></td>";
   $bingFile   .= "<td nowrap><b>Website Size</b></td>";
   $bingFile   .= "<td nowrap><b>Page Age</b></td>";
   $bingFile   .= "</tr>";

   for (my $i=0; $i < $numberOfLinesBing; $i++) {
      $bingFile   .= "<tr>";
      $bingFile   .= "<td align=left>$i&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingLinks[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingTitles[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingRealTitles[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingKeywordTitleMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.1f", $bingKeywordTitlePageCopy[$i];

      $bingFile   .= "<td align=left>$tmp&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDomainKeywordExactMatch[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDomainKeywordPartialMatch[$i]&nbsp;</td>";
      $tmp = sprintf "%.3f", $bingKeywordDensity[$i];
      $bingFile   .= "<td align=left>$tmp&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDescriptionMetaExact[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingDescriptionMetaPartial[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingNumberOfHeaderTags[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingHeaderTagsKeywords[$i]&nbsp;</td>";
      $tmp = $bingKeywordPositionsList[$i];
      $tmp =~ s/|/, /g;
      $bingFile   .= "<td align=left>$tmp&nbsp;</td>";
      $bingFile   .= "<td align=left><a href='./maps/bing".$i.".html'>Map</a></td>";
      printIndividualKeywordProminenceMap($i, @bingKeywordPositions, "bing");
      $bingFile   .= "<td align=left>$bingOutboundLinkKeywords[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingOutboundLinksPR[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingPageSize[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingWords[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingResultsWebsiteSizes[$i]&nbsp;</td>";
      $bingFile   .= "<td align=left>$bingPageAge[$i]&nbsp;</td>";
      $bingFile   .= "</tr>";
   }
   my $filename = "./report/bing.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $bingFile;
   close FILE;
}


# Subroutine:
#   createIndexHTML
# Description:
#   This subroutine creates HTML fragment for the index file
#   looking for last modified string
# Inputs:
#   $keyword => keyword
# Outputs:
#   Creates index.html
# Returns:
#   Returns nothing
sub createIndexHTML {
   my $keyword = shift;

   my $indexFile = "<html><head><title>Ranking Report Summary</title></head>";
   $indexFile   .= "<body><center><strong>Ranking Report Summary";
   $indexFile   .= " (for "$keyword") <br><br>";
   $indexFile   .= 
"<a href="#" onclick="document.all.myiframe.src='google.html'">";
   $indexFile   .= "Google</a> |";
   $indexFile   .= 
"<a href="#" onclick="document.all.myiframe.src='yahoo.html'">";
   $indexFile   .= "Yahoo!</a> |";
   $indexFile   .= 
"<a href="#" onclick="document.all.myiframe.src='bing.html'">";
   $indexFile   .= "Bing Search</a><br><br>";
   $indexFile   .= "Click on Links to View Summary..<br><br>";
   $indexFile   .= 
"<iframe name="myiframe" width=5000 height=6000 border="0" frameborder="0">";
   $indexFile   .= "</iframe></center></body></html>";

   my $filename = "./report/index.html";
   open FILE, ">", "$filename" or die $!;
   print FILE $indexFile;
   close FILE;
}


# Subroutine:
#   pageAgeAnalysis
# Description:
#   This subroutine scrapes all URLs found on SERPs
#   looking for last modified string
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $srcArr => array (reference) to links array
# Outputs:
#   none
# Returns:
#   Returns nothing
sub pageAgeAnalysis {
   my ($numberOfElements, $srcArr, $destArr) =  @_;

   for(my $i=0; $i<$numberOfElements; $i++) {
      #print "
processing: $srcArr->[$i]";
      my $ua = new LWP::UserAgent;
      $ua->agent("Mozilla/3.0 (compatible)");
      my $request = new HTTP::Request("GET", "$srcArr->[$i]");
      my $response = $ua->request($request);
      my $code=$response->code;
      $destArr->[$i]= scalar(localtime($response->last_modified)),
      #print "
$destArr->[$i]";
   }
}


# Subroutine:
#   analyzeWebsiteSize
# Description:
#   This subroutine scrapes Google SERPs to pick up size of
#   different websites
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $srcArr => array (reference) to links array
# Outputs:
#   none
# Returns:
#   Returns average site size
sub analyzeWebsiteSize {
   my ($numberOfElements, $srcArr, $destArr) =  @_;
   # compose "site:" links
   my $ua = new LWP::UserAgent;
   my $res;
   $ua->timeout(25);
   $ua->agent("Mozilla/3.0 (compatible)");
   my $total = 0;

   for($i=0; $i<$numberOfElements; $i++){

      my $filename = "./serptemp/temp.txt";
      my $url = $srcArr->[$i];
      #let's get the base URL first

      if($url =~ /^http/) {
         my @tmparr1 = split (////,$url);
         my @tmparr2 = split (///,$tmparr1[1]);
         my $baseurl = "";
         if($#tmparr2>0) {
            $baseurl = $tmparr2[0];
         }else {
            $baseurl = $tmparr1[1];
         }
         $baseurl =~ s//$//;
         $url = $baseurl;
      }

      my $tmpurl = 
'http://www.google.com/search?hl=en&q=site%3A' . $url . '&btnG=Search';
      my $randNum = int(rand(5));
      #print "
Sleeping for $randNum seconds.
";
      sleep($randNum);
      $res = $ua->get("$tmpurl",':content_file' => "$filename");
      #get the google SERP pagecopy variable
      my $pageCopy = "";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         #get pageCopy for this file
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_text("/body");
         }
      }else {
         print "
file does not exist";
      }
      #break it up with "of about <b>"
      my $separator1 = 'of about ';

      my @tempArr1 = split(/$separator1/, $pageCopy);
      my $separator2 = 'b';
      my @tempArr2 = split(/$separator2/, $tempArr1[1]);
      my $separator3 = ' for';
      my @tempArr3 = split(/$separator3/, $tempArr2[0]);

      my $size = $tempArr3[0];

      #remove comma in the number
      $size =~ s/,//g;

      # store it for that URL
      $destArr->[$i] = $size;
      $total = $total + $size;
   }
   #calculate and return the average
   if ($total>0) {
      return ($total/$numberOfElements);

   } else {
      return 0;
   }
}


# Subroutine:
#   optimumWordsPerPage
# Description:
#   This subroutine loops through all files to record
#   page sizes in destination array.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $prefix => SE file prefix
# Outputs:
#   none
# Returns:
#   Returns average words per page size
sub optimumWordsPerPage {
   my ($numberOfElements, $destArr, $prefix) = @_;
   my $total = 0;
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my $tree = HTML::TreeBuilder->new;
      $tree->parse_file("$filename");
      my $non_html = $tree->as_text();
      $non_html =~ s/^s+/ /g;
      my @tempsizearr = split(/ /,$non_html);
      $destArr->[$i]= $#tempsizearr;
      $total = $total + $#tempsizearr;
   }
   return ($total/$numberOfElements);
}


# Subroutine:
#   averagePageSize
# Description:
#   This subroutine loops through all files to record
#   page sizes in destination array.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to links array
#   $prefix => SE file prefix
# Outputs:
#   none
# Returns:
#   Returns average page size
sub averagePageSize {
   my ($numberOfElements, $destArr, $prefix) = @_;
   my $total = 0;
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my $filesize = -s "$filename";
      $destArr->[$i] = $filesize;
      $total = $total + $destArr->[$i];
   }
   return ($total/$numberOfElements);
}


# Subroutine:
#   outboundLinkPRAnalysis
# Description:
#   This subroutine parses PR values from root domains
#   of all outbound links
# Inputs:
#   $numberOfElements => number of files to process
#   $srcLinksArr => array (reference) to links array
#   $prefix => SE file prefix
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub outboundLinkPRAnalysis {
   my ($numberOfElements, $srcLinksArr, $destArr, $prefix) = @_;
   my $PRURL = 'http://www.seowarrior.net/scripts/pr.php?pr=';
   my $range = 2;
   #loop through each file
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my %linkHash = ();
      my $PRs = "";
      #check for file existence
      if (-e "$filename") {
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("a")) {
            #get link and anchor text
            my $url = $token->[1]{href} || "-";
            my $text = $p->get_trimmed_text("/a");
            #check if link internal or external
            if($url =~ /^http/) {
               my @tmparr1 = split (////,$url);
               my @tmparr2 = split (/./,$tmparr1[1]);
               my $tmpbaseURLChild = $tmparr2[0] . $tmparr2[1];

               my @tmparr3 = split (////,$srcLinksArr->[$i]);
               my @tmparr4 = split (/./,$tmparr3[1]);
               my $tmpbaseURLParent = $tmparr4[0] . $tmparr4[1];

               my @tmparr5 = split (///,$tmparr1[1]);
               my $baseurl = "";
               if($#tmparr5>0) {
                  $baseurl = $tmparr5[0];
               }else {
                  $baseurl = $tmparr1[1];
               }
               $baseurl =~ s//$//;

               if($tmpbaseURLChild ne $tmpbaseURLParent) {
                  #working with external link
                  if( !(exists $linkHash{$baseurl}) ){
                     #obtain PR value / use random sleep
                     my $randNum = int(rand($range));
                     #print "
Sleeping for $randNum seconds.
";
                     sleep($randNum);

                     my $tmpurl = $PRURL . $baseurl;
                     my $PR = get $tmpurl;
                     #print "$PR:";
                     $PR =~ s/
//g;
                     $PRs = $PRs . $PR . "|";
                     $linkHash{$baseurl} = 1;
                  }
               }
            }
         }
      }
      else {
            #print "
Filename: $filename not found!";
      }
      $destArr->[$i] =  $PRs;
      #print "
$PRs";
   }
}


# Subroutine:
#   outboundLinkKeywordAnalysis
# Description:
#   This subroutine analyzes keywords in outbound links
# Inputs:
#   $numberOfElements => number of files to process
#   $srcLinksArr => array (reference) to links array
#   $prefix => SE file prefix
#   $keyword => keyword
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub outboundLinkKeywordAnalysis {
   my ($numberOfElements, $srcLinksArr, $destArr, $prefix, $keyword) = @_;
   my @keywordFragments = split(/ /,$keyword);
   #loop through each file
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      my $keywordMatchPercent = "";
      my $foundCount = 0;
      my $total = 0;
      #check for file existence
      if (-e "$filename") {
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("a")) {
            #get link and anchor text
            my $url = $token->[1]{href} || "-";
            my $text = $p->get_trimmed_text("/a");
            $text =~ s/"//;
            $text =~ s/'//;

            #check if link internal or external
            if($url =~ /^http/) {
               @tmparr1 = split (////,$url);
               @tmparr2 = split (/./,$tmparr1[1]);
               $tmpbaseURLChild = $tmparr2[0] . $tmparr2[1];

               @tmparr3 = split (////,$srcLinksArr->[$i]);
               @tmparr4 = split (/./,$tmparr3[1]);
               $tmpbaseURLParent = $tmparr4[0] . $tmparr4[1];
               if($tmpbaseURLChild ne $tmpbaseURLParent) {
                  #external link..process it
                  if($#keywordFragments >0){
                     #handle multi keywords
                     for(my $j=0; $j <= $#keywordFragments; $j++){
                        #check for a match
                        if($text =~ /$keywordFragments[$j]/i) {
                           #match found
                           $foundCount++;
                           last;
                        }
                     }
                  } else {

                     if($text =~ /$keyword/i) {
                        #match found
                        $foundCount++;
                     }
                  }
               }
            }
            $total++;
         }
      }
      else {
            #print "
Filename: $filename not found!";

      }
      if($total>0) {
         $destArr->[$i] = ( $foundCount);
      } else {
         $destArr->[$i] = 0;
      }
      #print "
$destArr->[$i]";
   }

}


# Subroutine:
#   printKeywordProminenceMap
# Description:
#   This subroutine prints each URL map
# Inputs:
#   $numberOfElements => number of files to process
#   $srcArr => array (reference) to result array
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub printKeywordProminenceMap {
   my ($srcArr, $numberOfElements) = @_;
   for(my $i; $i<$numberOfElements; $i++){
      print "$srcArr->[$index]
";
   }
}


# Subroutine:
#   printIndividualKeywordProminenceMap
# Description:
#   This subroutine prints each URL map
# Inputs:
#   $numberOfElements => number of files to process
#   $srcArr => array (reference) to result array
# Outputs:
#   prints the keyword map
# Returns:
#   No returns
sub printIndividualKeywordProminenceMap {
   my ($index, $srcArr, $prefix) = @_;
   my $filename = "./report/maps/$prefix".$index.".html";
   open FILE, ">", "$filename" or die $!;
   print FILE "<html><head><title>
";
   print FILE "Keyword Prominence Map
";
   print FILE "</title></head>
";
   print FILE "<body><table width=400 cellpading=2 cellspacing=0><tr><td width=400>";
   print FILE $srcArr->[$index];
   print FILE "</td></tr></table></body></html>";
   close FILE;
}


# Subroutine:
#   analyzeKeywordPositions
# Description:
#   This subroutine analyzes relative positions of keywords within a page copy
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on arrays


sub analyzeKeywordPositions {
   my ($numberOfElements, $destArr, $destArr2, $prefix, $keyword) = @_;
   my @keywordFragments = split(/ /,$keyword);
   #loop through each file to get
   for(my $i=0; $i< $numberOfElements; $i++) {
      my $pageCopy = "";
      my $tmpMap = ":";
      my $filename = './serptemp/' . $prefix . "$i.txt";
      #check for file existence
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         #get pageCopy for this file
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_trimmed_text("/body");
            $pageCopy = cleanText($pageCopy);
         }
         $pageCopy =~ s/s+/ /g;
         my @tempArr = split(/ /, $pageCopy);
         $totalWords = $#tempArr;
         #print "
total words for this page: $totalWords";
         #loop through all words
         for(my $j=0; $j < $totalWords; $j++){
            my $flag = "N";
            if($#keywordFragments >0){
               #handle multi keywords
               for(my $k=0; $k <= $#keywordFragments; $k++){
                  #check for a match
                  if($tempArr[$j] =~ /$keywordFragments[$k]/i) {
                     #update destination variable with index of keyword array
                     $destArr->[$i] .= "$k ";
                     #update destination variable with relative positionposition
                     $destArr2->[$i] = $destArr2->[$i] ."$j" . "|";
                     $flag = "Y";
                     last;
                  } else {
                     if( ($k == $#keywordFragments) && ($flag ne "Y") ) {
                        $destArr->[$i] .= "* ";
                     }
                  }
               }
            } else {
               #handle single keyword
               $tempArr[$j] =~ s/"//;
               $tempArr[$j] =~ s/'//;

               if($tempArr[$j] =~ /$keyword/i){
                  $destArr->[$i] .= "0 ";
                  $destArr2->[$i] = $destArr2->[$i] . "$j" . "|";
                  $flag = "Y";
               } else {
                  $destArr->[$i] .= "* ";
               }
            }
            if($flag ne "N") {
               $destArr->[$i] .= "* ";
            }
         }
         #print "

$destArr->[$i]";
      } else {
         print "
file does not exist";
      }
   }
}


# Subroutine:
#   checkHeaderTags
# Description:
#   This subroutine checks use of heading tags in addition to checking
#   for keyword use in the same tags.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr1 => array (reference) to result array
#   $destArr2 => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on arrays
sub checkHeaderTags {
   my ($numberOfElements, $destArr1, $destArr2, $prefix, $keyword) = @_;
   my @keywordFragments = split(/ /,$keyword);

   for(my $i=0; $i < $numberOfElements; $i++) {
      my $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         my $h1Text = "";
         my $h2Text = "";
         my $h3Text = "";
         my $h4Text = "";
         my $h5Text = "";
         my $h6Text = "";
         my $separator = '|s|e|p|a|r|a|t|o|r';
         while(my $token = $p->get_token) {
            if($token->[0] eq 'S' and $token->[1] eq 'h1') {
               $h1Text = $h1Text . $separator . $p->get_text("/h1");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h2') {
               $h2Text = $h2Text . $separatpr . $p->get_text("/h2");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h3') {
               $h3Text = $h3Text . $separator . $p->get_text("/h3");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h4') {
               $h4Text = $h4Text . $separator . $p->get_text("/h4");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h5') {
               $h5Text = $h5Text . $separator . $p->get_text("/h5");
            }
            if($token->[0] eq 'S' and $token->[1] eq 'h6') {
               $h6Text = $h6Text . $separator . $p->get_text("/h6");
            }
         }
         $h1Text = cleanText($h1Text);
         $h2Text = cleanText($h2Text);
         $h3Text = cleanText($h3Text);
         $h4Text = cleanText($h4Text);
         $h5Text = cleanText($h5Text);
         $h6Text = cleanText($h6Text);

         my @h1Arr = split($separator, $h1Text);
         my @h2Arr = split($separator, $h2Text);
         my @h3Arr = split($separator, $h3Text);
         my @h4Arr = split($separator, $h4Text);
         my @h5Arr = split($separator, $h5Text);
         my @h6Arr = split($separator, $h6Text);

         my $h1Cnt = ($#h1Arr == −1) ? 0 : $#h1Arr;
         my $h2Cnt = ($#h2Arr == −1) ? 0 : $#h2Arr;
         my $h3Cnt = ($#h3Arr == −1) ? 0 : $#h3Arr;
         my $h4Cnt = ($#h4Arr == −1) ? 0 : $#h4Arr;
         my $h5Cnt = ($#h5Arr == −1) ? 0 : $#h5Arr;
         my $h6Cnt = ($#h6Arr == −1) ? 0 : $#h6Arr;

         my $h1Flag = "N";
         my $h2Flag = "N";
         my $h3Flag = "N";
         my $h4Flag = "N";
         my $h5Flag = "N";
         my $h6Flag = "N";
         $destArr1->[$i] = 
"".$h1Cnt."|".$h2Cnt."|".$h3Cnt."|".$h4Cnt."|".$h5Cnt."|".$h6Cnt;
         if($#keywordFragments > 0) {
            #handle multi keywords
            for(my $j=0; $j<=$#keywordFragments; $j++) {

               if( $keywordFragments[$j] =~ /$h1Text/i ) {
                  $h1Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h2Text/i ) {
                  $h2Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h3Text/i ) {
                  $h3Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h4Text/i ) {
                  $h4Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h5Text/i ) {
                  $h5Flag = "Y";
               }
               if( $keywordFragments[$j] =~ /$h6Text/i ) {
                  $h6Flag = "Y";
               }
            }
         } else {
            #handle keyword
            if($keyword =~ /$h1Text/i) {
               $h1Flag = "Y";
            }
            if($keyword =~ /$h2Text/i) {
               $h2Flag = "Y";
            }
            if($keyword =~ /$h3Text/i) {
               $h3Flag = "Y";
            }
            if($keyword =~ /$h4Text/i) {
               $h4Flag = "Y";
            }
            if($keyword =~ /$h5Text/i) {
               $h5Flag = "Y";
            }
            if($keyword =~ /$h6Text/i) {
               $h6Flag = "Y";
            }

         }
         $destArr2->[$i] = 
"".$h1Flag."|".$h2Flag."|".$h3Flag."|".$h4Flag."|".$h5Flag."|".$h6Flag;

      } else {
          # no file =>insert defaults;
          $destArr1->[$i] = "0|0|0|0|0|0|";
          $destArr2->[$i] = "N|N|N|N|N|N|";
      }
      #print "
".$destArr1->[$i]."
".$destArr2->[$i];

   }
}


# Subroutine:
#   checkExactDescriptionMeta
# Description:
#   This subroutine checks for exact keyword match in keyword description.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array
sub checkExactDescriptionMeta {
   my ($numberOfElements, $destArr, $keyword, $prefix) = @_;
   for(my $i=0; $i<$numberOfElements; $i++){
      $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token=$p->get_tag("meta")) {
            if ($token->[1]{name}=~/description/i) {
               my $metaDescription = $token->[1]{content};
               $metaDescription =~ s/"//;
               $metaDescription =~ s/'//;

               if($metaDescription =~ /$keyword/i) {
                  $destArr->[$i] = "Y";
               } else {
                  $destArr->[$i] = "N";
               }
            }
         }
      }
      if ( !(exists $destArr->[$i])) {
         $destArr->[$i] = "N";
      }
    }
}


# Subroutine:
#   checkExactDescriptionMeta
# Description:
#   This subroutine checks for exact keyword match in keyword description.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array
sub checkPartialDescriptionMeta {
   my ($numberOfElements, $destArr, $keyword, $prefix) = @_;
   my @keywordFragments = split(/ /, $keyword);

   for(my $i=0; $i<$numberOfElements; $i++){
      $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token=$p->get_tag("meta")) {
            if ($token->[1]{name}=~/description/i) {
               my $metaDescription = $token->[1]{content};

               if($#keywordFragments >0) {
                  for (my $j=0; $j<=$#keywordFragments; $j++){
                     if($metaDescription =~ /$keywordFragments[$j]/i) {
                        $destArr->[$i] = "Y";
                        last;
                     } else {
                        $destArr->[$i] = "N";
                     }
                  }
               } else {
                  if($metaDescription =~ /$keyword/i) {
                     $destArr->[$i] = "Y";
                     last;
                  } else {
                     $destArr->[$i] = "N";
                  }
               }
            }
         }
      }
      if ( !(exists $destArr->[$i])) {
         $destArr->[$i] = "N";
      }
   }
}


# Subroutine:
#   keywordDensity
# Description:
#   This subroutine calculates keyword density for given keyword.
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array

sub keywordDensity {
   my ($numberOfElements, $keyword, $destArr, $prefix) = @_;
   my $total = 0;
   #loop through all files

   for(my $i=0; $i<$numberOfElements; $i++) {
      my $pageCopy = "";
      my $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_trimmed_text("/body");
         }
      } else {
         print "
File not found when calculating keyword density.";

      }
      #compare copy and array (sep function)
      $pageCopy =~ s/"//g;
      $pageCopy =~ s/'//g;

      $total = $total + calculateKD($i, $pageCopy, $destArr, $keyword);
   }
   return ($total/$numberOfElements);
}


# Subroutine:
#   calcualteKD
# Description:
#   Helper subroutine to calculate keyword density
# Inputs:
#   $numberOfElements => number of files to process
#   $destArr => array (reference) to result array
#   $keyword => keyword to analyze
#   $prefix => file prefix
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on array
sub calculateKD {
   my ($index, $pageCopy, $destArr, $keyword) = @_;

   my @keywordFragments = split (/ /,$keyword);
   if ($#keywordFragments>0) {
      for (my $i=0; $i<= $#keywordFragments; $i++){
         my @tempArr  = split(/$keywordFragments[$i]/,$pageCopy);
         my @tempArr2 = split(/ /, $pageCopy);
         if( ($#tempArr == −1) || ($#tempArr2 == −1)) {
            $destArr->[$index] = 0;
         }else {
            $destArr->[$index] = $destArr->[$index] + ($#tempArr/$#tempArr2)*100;
         }
      }
      return $destArr->[$index];

   } else {
      my @tempArr  = split(/$keyword/,$pageCopy);
      my @tempArr2 = split(/ /, $pageCopy);
      $destArr->[$index] = ($#tempArr/$#tempArr2)*100;
      return $destArr->[$index];
   }

}


# Subroutine:
#   keywordDomainExactMatch
# Description:
#   This subroutine analyzes keywords in domain names. It looks
#   to see if keyword is part of the domain name.
#   Possible improvement could also consider keyword stemming.
# Inputs:
#   $numberOfElements => number of files to process
#   $linksArr => array (reference) to links array
#   $destArr => array (reference) to result array
#   $keyword => file prefix for the three SEs
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed array
sub keywordDomainExactMatch {
   my ($keyword, $linksArr, $numberOfElements, $destArr) = @_;
   my $matchCnt=0;
   my @keywordFragments = split(/ /, $keyword);
   my $numberOfKeywordFragments = $#keywordFragments;
   my $total = 0;
   for (my $i=0; $i<=$numberOfElements; $i++) {
      $matchCnt=0;
      my $tmp = $linksArr->[$i];
      $tmp =~ s/^http:////g;
      $tmp =~ s/^https:////g;
      my @linkFragments = split(///,$tmp);
      my $link = $linkFragments[0];

      if($numberOfKeywordFragments>0) {
         for(my $j=0; $j<=$numberOfKeywordFragments; $j++) {
            if ($link =~ /$keywordFragments[$j]/i) {
               $matchCnt++;
            }
         }
      } else {
         if($link =~ /$keyword/i) {
              $matchCnt++;
         }
      }
      if($matchCnt>0) {
         if($numberOfKeywordFragments>0) {
            if($matchCnt == ($numberOfKeywordFragments+1)) {
               $destArr->[$i] = "Y";
            } else {
               $destArr->[$i] = "N";
            }
          } else {
             # single keyword
             $destArr->[$i] = "Y";
          }

       } else {
          $destArr->[$i] = "N";
       }
       if($destArr->[$i] eq "Y") {
         $total++;
       }
   }
   return ( ($total/$numberOfElements)* 100);
}


# Subroutine:
#   keywordDomainPartialMatch
# Description:
#   This subroutine analyzes keywords in domain names. It looks
#   for partial matche between the keyword and the domain name.
# Inputs:
#   $numberOfElements => number of files to process
#   $linksArr => array (reference) to links array
#   $destArr => array (reference) to result array
#   $keyword => file prefix for the three SEs
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed array
sub keywordDomainPartialMatch {
   my ($keyword, $linksArr, $numberOfElements, $destArr) = @_;
   my $totalNumber = $numberOfElements;
   my $matchCnt=0;
   my @keywordFragments = split (/ /, $keyword);
   my $numOfKeywordFragments = $#keywordFragments;

   my $keywordHyphen = $keyword;
   my $keywordUnderscore = $keyword;
   my $keywordNoSpace = $keyword;
   $keywordHyphen =~ s/ /-/g;
   $keywordNoSpace =~ s/ //g;


   #loop through all links
   if($numOfKeywordFragments >0) {
      for(my $i=0; $i<$numberOfElements; $i++) {
         my $tmp = $linksArr->[$i];
         $tmp =~ s/^http:////gi;
         $tmp =~ s/^https:////gi;
         my @linkFragments = split(///,$tmp);
         my $link = $linkFragments[0];
         for(my $j=0; $j<=$numOfKeywordFragments; $j++) {
            if($link =~ /$keywordFragments[$j]/i) {
               $destArr->[$i] = "Y";
               $j = $numOfKeywordFragments;
               $matchCnt++;
            } else {
               $destArr->[$i] = "N";
            }
         }
      }
   } else {

      for(my $i=0; $i<$numberOfElements; $i++) {
         my $tmp = $linksArr->[$i];
         $tmp =~ s/^http:////g;
         $tmp =~ s/^https:////g;
         my @linkFragments = split(///,$tmp);
         my $link = $linkFragments[0];

         if( ($link =~ /$keyword/) ||
            ($link =~ /$keywordHyphen/) ||
            ($link =~ /$keywordNoSpace/) ) {
            $destArr->[$i] = "Y";
            $matchCnt++;
         } else {
            $destArr->[$i] = "N";
         }
      }



   }
   return ( ($matchCnt/$totalNumber)* 100);
}


# Subroutine:
#   compareTitlePageCopy
# Description:
#   This subroutine compares page title to page copy
# Inputs:
#   $numberOfElements => number of files to process
#   $titlesArr => array (reference) to titles array
#   $destArr => array (reference) to result array
#   $prefix => file prefix for the three SEs
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed arrays
sub compareTitlePageCopy {
   my ($numberOfElements, $titlesArr, $destArr, $prefix) = @_;
   #loop through all files
   for(my $i=0; $i<=$numberOfElements; $i++) {
      #split up current title into token words
      my $title = $titlesArr->[$i];

      $title = cleanText($title);
      $title =~ s/'//g;
      $title =~ s/"//g;


      my @titleFragments = split(/ /,$title);
      #get copy of each file
      my $pageCopy = "";
      my $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_tag("body")) {
            $pageCopy = $p->get_trimmed_text("/body");
            $pageCopy =~ s/'//g;
            $pageCopy =~ s/"//g;

           last;
         }
      }
      #compare copy and array (sep function)
      compareTitlePageCopyHelper($i, $#titleFragments, 
@titleFragments, $pageCopy, $destArr);
   }

}


# Subroutine:
#   compareTitlePageCopyHelper
# Description:
#   This subroutine is used by compareTitlePageCopy subroutine
#   to compare page title to page copy
# Inputs:
#   $index => represents numerical index of the array
#   $numberOfElements => number of files to process
#   $titleFragments => array (reference) to title fragments array
#   $pageCopy => page copy text
#   $pageCopyTitleArr => array (reference) to resulting array
# Outputs:
#   No outputs produced
# Returns:
#   No returns all work done on passed arrays

sub compareTitlePageCopyHelper {
   my ($index, $numberOfElements, $titleFragments, $pageCopy, $pageCopyTitleArr) = @_;
   my $foundCnt = 0;
   my $totalTitleFragments = $numberOfElements;

   for(my $j=0; $j<=$numberOfElements; $j++) {
      my $tmpfragment = $titleFragments->[$j];

      if( $pageCopy =~ /$tmpfragment/i ){
         $foundCnt++;
      }
   }
   if($foundCnt == 0){
      $pageCopyTitleArr->[$index] = 0;
   } else {
         $pageCopyTitleArr->[$index] = ( ($foundCnt/($totalTitleFragments+1)) * 100);

   }
}


# Subroutine:
#   compareArrays
# Description:
#   This subroutine compares elements of two arrays to see if they
#   are found in each other.
# Inputs:
#   $numberOfElements => number of files to process
#   $realArr => array (reference) to first source array
#   $foundArr => array (reference) to second source array
#   $destArr => array (reference) to result array
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine returns percentage of found matches
sub compareArrays {
   my ($numOfElements, $realArr, $foundArr, $destArr) = @_;
   my $found = 0;
   my $percentMatch = 0;

   for(my $i=0; $i<$numOfElements; $i++){
      $tmpVar = $foundArr->[$i];
      $tmpVar =~ s/(/\(/g;
      $tmpVar =~ s/)/\)/g;
      $tmpVar =~ s/-/\-/g;
      $tmpVar =~ s/+/\+/g;
      $tmpVar =~ s/$/\$/g;
      $tmpVar =~ s/^/\^/g;
      $tmpVar =~ s/[/\[/g;
      $tmpVar =~ s/]/\]/g;
      $tmpVar =~ s/}/\}/g;
      $tmpVar =~ s/{/\{/g;


      if ($realArr->[$i] =~ /$tmpVar/i) {
         $destArr[$i] = "Y";
         $found++;
      }else {
         $destArr[$i] = "N";
      }

   }
   return ( ($found/$numOfElements)*100);
}


# Subroutine:
#   getRealTitles
# Description:
#   This subroutine retrieves actual titles
# Inputs:
#   $numberOfElements => number of files to process
#   $titlesArr => array (reference) to array that will contain real titles
#   $prefix => prefix of file name to be used
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine operates on array already defined outside the routine.
#   Subroutine returns nothing.
sub getRealTitles {
   my ($numberOfElements, $titlesArr, $prefix) = @_;
   for(my $i=0; $i<$numberOfElements; $i++){
      $filename = './serptemp/' . $prefix . "$i.txt";
      if (-e "$filename"){
         my $p = HTML::TokeParser->new($filename);
         while (my $token = $p->get_token) {
            if ($token->[0] eq "S" and lc $token->[1] eq 'title') {
               my $title =  $p->get_text() || "not found";
               $title =~ s/^s+//;
               $title =~ s/s+$//;
               $titlesArr->[$i]=$title;
               last;
            }
         }
      }else {
         $titlesArr->[$i]="not found";
      }

   }
}


# Subroutine:
#   getKeywordsTitleMatch
# Description:
#   This subroutine compares given keyword with entires of array
#   while setting third array with results of this comparison
# Inputs:
#   $keyword => keyword or keyphrase to do analysis on
#   $sourceArr => array (reference) to be used for comparisons
#   $numOfElements => size of referred array
#   $destArr => array (reference) that will contain compariosn results
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine operates on array already defined outside the routine.
#   Subroutine returns nothing.
sub getKeywordsTitleMatch {
   my ($keyword, $sourceArr, $numOfElements, $destArr) = @_;

   $keyword = cleanText($keyword);
   $keyword =~ s/'//g;
   $keyword =~ s/"//g;
   @keywordFragments = split(/ /, $keyword);
   my $numberOfKeywordTokens = $#keywordFragments;

   for(my $i=0; $i<= $numOfElements; $i++) {
      my $tmp = $sourceArr->[$i];
      $tmp = cleanText($tmp);
      $tmp =~ s/'//;
      $tmp =~ s/"//;
      my $foundCnt = 0;
      if ($numberOfKeywordTokens >0) {
         for(my $j=0; $j<=$#keywordFragments; $j++){
            if ($tmp =~ /$keywordFragments[$j]/i) {
               $foundCnt++;

               last;
            }
         }
      } else {
         if ($tmp =~ /$keyword/i) {
            $foundCnt++;

         }
      }
      if($foundCnt > 0) {
         $destArr->[$i] = "Y";
      } else {
         $destArr->[$i] = "N";
      }
   }
}


# Subroutine:
#   initializeKeyVariables
# Description:
#   Main purpose is to setup link and title arrays that are
#   to be used throughout the script.
# Inputs:
#   $keyword => keyword or keyphrase to do analysis on
#   $googleLinksArr => array (reference) containing Google links
#   $googleTitlesArr => array (reference) containing Google titles
#   $yahooLinksArr => array (reference) containing Yahoo! links
#   $yahooTitlesArr => array (reference) containing Yahoo! titles
#   $bingLinksArr => array (reference) containing Bing links
#   $bingTitlesArr => array (reference) containing Bing titles
# Outputs:
#   No outputs produced
# Returns:
#   Subroutine operates on arrays already defined outside the routine.
#   Subroutine returns nothing.
sub initializeKeyVariables {
   my ($keyword,     $googleLinksArr,$googleTitlesArr,
       $yahooLinksArr, $yahooTitlesArr,   $bingLinksArr,
       $bingTitlesArr) = @_;
   #create user agents
   my $uaGoogle = new LWP::UserAgent;
   my $uaYahoo = new LWP::UserAgent;
   my $uaBing = new LWP::UserAgent;

   #setup time out to 25 seconds
   $uaGoogle->timeout(25);
   $uaYahoo->timeout(25);
   $uaBing->timeout(25);

   #setup user agent
   my $useragent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)";
   $uaGoogle->agent("$useragent");
   $uaYahoo->agent("$useragent");
   $uaBing->agent("$useragent");

   #setup & get one hundred results for each SE
   my $gurl= 
"http://www.google.com/search?num=$numres&hl=en&safe=off&q=$keyword&sa=N";
   my $yurl= 
"http://search.yahoo.com/search?p=$keyword&ei=UTF-8&fr=sfp&n=$numres&b=1";
   my $lurl= 
"http://search.bing.com/results.aspx?q=$keyword&first=1&count=$numres&";


   my $reqGoogle = new HTTP::Request GET => "$gurl";
   my $reqYahoo = new HTTP::Request GET => "$yurl";
   my $reqBing = new HTTP::Request GET => "$lurl";

   my $resGoogle = $uaGoogle->request($reqGoogle);
   my $resYahoo = $uaYahoo->request($reqYahoo);
   my $resBing = $uaBing->request($reqBing);

   #assign SERPs to special variables
   my $ghtml = $resGoogle->content;
   my $yhtml = $resYahoo->content;
   my $lhtml = $resBing->content;

   #get links for each serp
   my $streamGoogle = HTML::TokeParser->new($ghtml);
   my $streamYahoo  = HTML::TokeParser->new($yhtml);
   my $streamBing   = HTML::TokeParser->new($lhtml);

   # process google links
   my $cnt=0;
   my $threeDots = '...';
   while (my $token = $streamGoogle->get_token) {
      if ($token->[0] eq 'S' && $token->[1] eq 'a') {
         if( ($token->[2]{'href'} !~ /cache/i) &&
         !($token->[2]{'href'} !~ /^http/i) &&
          ($token->[2]{'href'} !~ /^https/i) &&
          ($token->[2]{'href'} !~ /google/i) &&
          ($token->[2]{'href'} !~ /aclk/i) &&
          ($token->[2]{'href'} !~ /youtube/i)&&
          ($token->[2]{'href'} !~ /wikipedia/i) ) {
          $googleLinksArr->[$cnt] = $token->[2]{'href'};
          $googleTitlesArr->[$cnt] = $streamGoogle->get_trimmed_text("/a");
          $googleTitlesArr->[$cnt] =~ s/$threeDots$//;
          $cnt++;
         }
      }
   }
   # process yahoo links
   my $cnt2=0;
   while (my $token = $streamYahoo->get_token) {
      if ($token->[0] eq 'S' && $token->[1] eq 'a') {
        @tmpurl= split (/**/, $token->[2]{'href'});
        $tmpurl[1] =~ s/%3f/?/g;
        $tmpurl[1] =~ s/%26/&/g;

        if( ($tmpurl[1] !~ /cache/i) &&
            ($tmpurl[1] !~ /^https/i) &&
            ($tmpurl[1] !~ /yahoo/i) &&
            ($tmpurl[1] !~ /wikipedia/i) &&
            ($tmpurl[1] !~ /overture/i) ){
           $tmpurl[1] =~ s/%3a/:/g;
           $tmpurl[1] =~ s/^s+//g;
           if( $tmpurl[1] ne "") {
              $yahooLinksArr->[$cnt2] = $tmpurl[1];
          $yahooTitlesArr->[$cnt2] = $streamYahoo->get_trimmed_text("/a");
              $yahooTitlesArr->[$cnt2] =~ s/$threeDots$//;
              $cnt2++;
           }
        }
      }
   }
   # process bing links
   my $cnt3=0;
   while (my $token = $streamBing->get_token) {
      if ($token->[0] eq 'S' && $token->[1] eq 'a') {
         if( !($token->[2]{'href'} !~ /^http/i) &&
          ($token->[2]{'href'} !~ /^https/i) &&
          ($token->[2]{'href'} !~ /cache/i) &&
          ($token->[2]{'href'} !~ /wikipedia/i) &&
          ($token->[2]{'href'} !~ /msn/i) &&
          ($token->[2]{'href'} !~ /hotmail/i) &&
          ($token->[2]{'href'} !~ /microsoft/i) &&
          ($token->[2]{'href'} !~ /bing.com/i) ) {
           $token->[2]{'href'} =~ s/^s+//g;
           if($token->[2]{'href'} ne "")  {
              $bingLinksArr->[$cnt3] = $token->[2]{'href'};
          $bingTitlesArr->[$cnt3] = $streamBing->get_trimmed_text("/a");
              $bingTitlesArr->[$cnt3] =~ s/$threeDots$//;
              $cnt3++;
           }
         }
      }
   }
}


# Subroutine:
#   getSERPResults
# Description:
#   This subroutine downloads htmls of all urls specified
#   in the array referenced by $urlArr
# Inputs:
#   $numberOfElements => size of referred array
#   $urlArr => array (reference) containing urls to process
#   $name => prefix of file name to be used
# Outputs:
#   text files contain html from downloaded links
# Returns:
#   Subroutine operates on array already defined outside the routine.
#   Subroutine returns nothing.
sub getSERPResults {
   my ($numberOfElements, $urlArr, $name) = @_;
   my $ua = new LWP::UserAgent;
   my $res;

   $ua->timeout(25);
   $ua->agent("My Crawler");

   for($i=0;$i<$numberOfElements;$i++){
      $filename = "./serptemp/". $name . $i . ".txt";
      $res = $ua->get("$urlArr->[$i]",':content_file' => "$filename");
   }
}


# Subroutine:
#   cleanText
# Description:
#   This is a utility subroutine to clean HTML fragments.
# Inputs:
#   $text => content of text to clean
# Outputs:
#   No outputs produced
# Returns:
#   No returns; all work done on passed array
sub cleanText {
   my $text = shift;
      $text =~ s/(/ /g;
      $text =~ s/)/ /g;
      $text =~ s/[/ /g;
      $text =~ s/]/ /g;
      $text =~ s/./ /g;
      $text =~ s/-/ /g;
      $text =~ s/=/ /g;
      $text =~ s/|/ /g;
      $text =~ s/!/ /g;
      $text =~ s/,/ /g;
      $text =~ s/?/ /g;
      $text =~ s/^/ /g;
      $text =~ s/:/ /g;
      $text =~ s/;/ /g;
      $text =~ s/&/ /g;
      $text =~ s/*/ /g;
      $text =~ s/$/ /g;
      $text =~ s/s+/ /g;
   return $text;
}

Chapter 5

linkchecker.pl

#!/usr/local/bin/perl
#####################################################################
# File: linkchecker.pl                                              #
# Description: Check Links Script                                   #
# Usage: perl linkchecker.pl http://somedomain.net > report.csv     #                                                               #####################################################################
use WWW::Mechanize;
use LWP::Simple;
my $baseurl = shift;
my @url=();
my @level=();
my @type=();
my @title=();
my @status=();
my @page=();
my %uniqueURL=();
my %checkedURL=();
my $masterCnt=0;
my $masterLevel=1;
$mech  = WWW::Mechanize->new();
#### Processing Level One
$mech->get( $baseurl );

@links = $mech->links();
foreach $link (@links) {
$tmpurl = $baseurl . '/' . $link->url();
  if ( ($link->url() !~ /mailto/i) &&
       ($link->url() !~ /javascript/i ) ) {
    if ($link->url() !~ /^http/) {
      #collect unique URL
      $uniqueURL{$tmpurl}=$link->text();
      $url[$masterCnt]=$tmpurl;
      $type[$masterCnt]= "relative";
    }else {
      $tmpurl = $link->url();
      $uniqueURL{$link->url()}=$link->text();
      $url[$masterCnt]=$link->url();
      if( $link->url() =~ /$baseurl/ ){
        $type[$masterCnt]= "absolute internal";
      }else {
        $type[$masterCnt]= "outbound";
      }
    }
    $level[$masterCnt]=$masterLevel;
    $title[$masterCnt]=$link->text();
    $page[$masterCnt]=$baseurl;
    $masterCnt++;
  }
}
$masterLevel++;
$linksOnFirstLevel=$masterCnt;

####Processing Level Two
%levTwoURLs = ();
$masterCnt = processSubLevel(2, $masterCnt, @url, @level, @type,
                       @title, @status, @page, \%uniqueURL,
                      $baseurl,  $masterLevel, \%levTwoURLs);
$masterLevel++;
$linksOnSecondLevel = keys(%levTwoURLs);
####Processing Level Three
%levThreeURLs = ();
$masterCnt = processSubLevel(3, $masterCnt, @url, @level,
                           @type, @title, @status, @page,
                    \%levTwoURLs, $baseurl, $masterLevel,
                    \%levThreeURLs);
$masterLevel++;
$linksOnThirdLevel = keys(%levThreeURLs);
####Processing Level Four
%levFourURLs = ();
$masterCnt = processSubLevel(4, $masterCnt, @url, @level, @type,
                       @title, @status,@page, \%levThreeURLs,
                      $baseurl, $masterLevel, \%levFourURLs);
$linksOnFourthLevel = keys(%levFourURLs);
printReport(@level,@page,@url,@type,@title,@status, $masterCnt);
#### subroutines
sub processSubLevel {
  my ($currentLevel, $mstCnt, $urlArr, $leArr, $tyArr, $tiArr,
             $stArr,  $paArr, $urls, $burl, $mlevel,
             $uniqueHashRef) = @_;

  my %urlHash = ();
  foreach $item (@$urlArr){
    $urlHash{$item} = 1;
  }
  foreach $lURL (keys %$urls) {
    if( ($lURL !~ /.gif$/) && ($lURL !~ /.jpg$/) &&
        ($lURL !~ /.png$/) && ($lURL !~ /.pdf$/) &&
        ($lURL !~ /.doc$/) && ($lURL !~ /.xls$/) &&
        ($lURL !~ /.asf$/) && ($lURL !~ /.mov$/) &&
        ($lURL !~ /.avi$/) && ($lURL !~ /.xvid$/) &&
        ($lURL !~ /.flv$/) && ($lURL !~ /.mpg$/) &&
        ($lURL !~ /.3gp$/) && ($lURL !~ /.mp4$/) &&
        ($lURL !~ /.qt$/) && ($lURL !~ /.rm$/) &&
        ($lURL !~ /.swf$/) && ($lURL !~ /.wmv$/) &&
        ($lURL !~ /.txt$/) && ($lURL !~ /.js$/) &&
        ($lURL !~ /.css$/) && ($lURL =~ /$burl/) &&
        ($lURL !~ /mailto/i)&&($lURL !~ /javascript/i)  ) {
      $mech->get( $lURL );
      @sublinks = $mech->links();
      $cnt2=0;
      foreach $link (@sublinks) {
        my $tmpurl ="";
        #assuming relative link creating temp variable
        if ( $link->url() !~ /^http/i ) {
          $tmpurl = $burl . '/' . $link->url();
        }else {
          $tmpurl = $link->url();
        }
        if(!(exists $urlHash{$tmpurl}) ){
          if ( ($link->url() !~ /mailto/i) &&
              ($link->url() !~ /javascript/i ) ) {
             #check UNIQUENESS
             if( !(exists $urls->{$tmpurl}) ) {
                $urls->{$tmpurl}=$link->text();
                $uniqueHashRef->{ $tmpurl } = $link->text();
             }
             # check if link relative or absolute
             if ( $link->url() !~ /^http/ ) {
               ## RELATIVE
               $urlArr->[$mstCnt]= $tmpurl;
               $tyArr->[$mstCnt]= "relative internal";
             }else {
               ## ABSOLUTE
               #adjusting temp variable
               $urlArr->[$mstCnt]=$link->url();
               if( $link->url() =~ /$baseurl/ ){
                 $tyArr->[$mstCnt]= "absolute internal";
               }else {
                 $tyArr->[$mstCnt]= "outbound";
               }
             }
             $leArr->[$mstCnt]=$mlevel;
             $tiArr->[$mstCnt]=$link->text();
             $paArr->[$mstCnt]=$tmpurl;
             $mstCnt++;
          }
        }
      }
    }
  }
  return ($mstCnt);
}
sub printReport {
  my ($levelArr, $pageArr, $urlArr, $typeArr, $titleArr,
     $statusArr, $mCnt) = @_;
  %tmpCleanupHash=();
  print "Level	Parent Page or Location	
Unique URL	Link Type	Title	Status Codes";
  for($i=0;$i<$mCnt;$i++) {
    if ( !(exists $tmpCleanupHash{$url[$i]}) ){
      $tmpCleanupHash{$url[$i]} = 1;
      if ($levelArr->[$i] ne "") {
        print 
"
$levelArr->[$i]	$pageArr->[$i]	$urlArr->[$i]	$typeArr->[$i]	$titleArr->[$i]
t".getstore($urlArr->[$i], "temp");
      }
    }
  }
}

mymonitor.pl

################################################
# File: mymonitor.pl                           #
# Description: This script takes an argument   #
#              reporesenting a web page url    #
# Format: perl mymonitor.pl http://www.xyz.com #
################################################
use threads;
use Benchmark;
use Time::HiRes qw(gettimeofday tv_interval);
use LWP::Simple;
use LWP::UserAgent;
use File::Path;
#get page to monitor
my $pageToMonitor = shift;
my $ua = new LWP::UserAgent;
my $res;
#cleanup temp files
rmtree( './temp', {keep_root => 1} );
# start timer
my $start_time = [ gettimeofday ];
$res = $ua->get("$pageToMonitor",':content_file' => "./temp/temp.dat");
# stop timer
my $end_time = [ gettimeofday ];
my $elapsedtime = tv_interval($start_time,$end_time);
##### CREATING DATA FILES #####################################
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
    = localtime time;
$year += 1900;
$mon++;
# Create today.txt
open OUTPTR, ">>./report/today/today.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;
$elapsedtime
";
close OUTPTR;
# Create month.txt
open OUTPTR, ">>./report/month/month.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;
$elapsedtime
";
close OUTPTR;
# Create year.txt
open OUTPTR, ">>./report/year/year.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;
$elapsedtime
";
close OUTPTR;
# Create historical.txt
open OUTPTR, ">>./report/historical/historical.txt";
print OUTPTR "$mday-$mon-$year $hour:$min:$sec;10;15;20;$elapsedtime
";
close OUTPTR;

inlinksAnalysis.pl

#!/usr/local/bin/perl
###########################################################
# File: inlinksAnalysis.pl                                #
# Description: This script performs analysis on Yahoo!    #
#              inbound links TSD file                     #
###########################################################
use LWP::Simple;
use LWP::UserAgent;
use HTML::TokeParser;
my @URLs = ();
#get the input param name of the file
my $fileToProcess = $ARGV[0];
my $baseurl = $ARGV[1];
print "
Processing: $fileToProcess";
my $cnt = 0;
# open the file
if (-e "$fileToProcess"){
   open FILE, "$fileToProcess" or die $!;
   while (<FILE>) {
      my $line = $_;
      my @fragments = split(/	/, $line);
      my $url = $fragments[1];
      $URLs[$cnt] = $url;
      $cnt++;
   }
} else {
    print "
file ($fileToProcess) does not exist";
}
my $ua = new LWP::UserAgent;
my $res;
$ua->agent("My Crawler");
my %linkPopHash = ();
my %anchorPopHash = ();
for(my $i=0; $i<=$cnt; $i++) {
   $res = $ua->get("$URLs[$i]",':content_file' => "temp.txt");
   if (-e "temp.txt") {
      my $p = HTML::TokeParser->new("temp.txt");
      while (my $token = $p->get_tag("a")) {
         #get link and anchor text
         my $url = $token->[1]{href} || "-";
         my $anchorText = $p->get_trimmed_text("/a");
         $url =~ s/^s+//g;
         $url =~ s/s+$//g;
         my $text = $p->get_trimmed_text("/a");
         if ($url =~ /$baseurl/i) {
            #print "
$baseurl URL: $URLs[$i] LINK: $url";
            if(exists $linkPopHash{$url}){
               $linkPopHash{$url} = $linkPopHash{$url} + 1;
               $anchorPopHash{$url} = $anchorText;
            } else {
               $linkPopHash{$url} = 1;
               $anchorPopHash{$url} = $anchorText;
            }
         }
      }
   }
}
open (FP, '>report.txt'),
foreach my $key ( sort { $linkPopHash{$b} <=> $linkPopHash{$a} }
keys %linkPopHash ) {
   print FP "$key, $linkPopHash{$key}, "$anchorPopHash{$key}"
";
}
close (FP);

Chapter 6

searchPhraseReportGoogle.pl

#!/usr/bin/perl
#----------------------------------#
#  PROGRAM:  Search Phrase Report  #
#----------------------------------#

$numArgs = $#ARGV + 1;

%googleDirCnt = ();

foreach $argnum (0 .. $#ARGV) {
   print "Processing $ARGV[$argnum] file

";
   $LOGFILE = "$ARGV[$argnum]";
   open(LOGFILE) or die("Could not open log file: $ARGV[$argnum].");
   foreach $line (<LOGFILE>) {
     #do Google analysis
     if(($line =~ /q=/) && ($line =~ /google/)) {
         @tmp1 = split ('GET ',$line);
         @tmp2 = split (' ', $tmp1[1]);
         @tmp3 = split ('q=', $tmp1[1]);
         @tmp4 = split ('&', $tmp3[1]);
         #do some cleanup
         $tmp4[0] =~ s/+/ /;
         $tmp4[0] =~ s/\%20/ /g;
         $tmp4[0] =~ s/\%3C/</gi;
         $tmp4[0] =~ s/\%3E/>/gi;
         $tmp4[0] =~ s/\%23/#/g;
         $tmp4[0] =~ s/\%22/"/g;
         $tmp4[0] =~ s/\%25/\%/g;
         $tmp4[0] =~ s/\%3A/:/gi;
         $tmp4[0] =~ s/\%2F///gi;
         $tmp4[0] =~ s/\%2B/+/gi;
         @tmp5 =  split ('"', $tmp4[0]);
         $tmpKey = "<tr><td>".$tmp2[0]." </td><td>".$tmp5[0]."</td>";
         $googleDirCnt{$tmpKey} = $googleDirCnt{$tmpKey} +1;
     }
   }
   close(LOGFILE);
}

open (FP, '>keywordsummary.html'),
print FP "<html><head><title>Keyword Summary</title><head>";
print FP "<body><strong>Google Summary</strong>";
print FP "<table width=400><tr><td><b>Resource/URL</b></td><td><b>Keyword</b></td>";
print FP "<td><b>Count</b></td><tr>";
foreach $key (sort hashValueDescendingNum (keys(%googleDirCnt))) {
       print FP $key."<td>".$googleDirCnt{$key}."</td></tr>";
}
print FP "</table></body></html>";
close (FP);


sub hashValueDescendingNum {
   $googleDirCnt{$b} <=> $googleDirCnt{$a};
}

Chapter 13

getRankings.pl

#!/usr/local/bin/perl

###########################################
# File: getRankings.pl                    #
# Description: This script queries SEs    #
#              to produce rankings report #
###########################################

### Basic setup part
$numOfArgs = $#ARGV + 1;
$originalkeywordphrase = "";
$targeturl="";

if ( ($numOfArgs == 0) || ($numOfArgs == 1) || ($numOfArgs < 0)) {
   print ("

Usage: perl getRanking.pl [TargetURL] [Keyword]
");
   print ("
OR
");
   print ("
Usage: perl getRanking.pl [TargetURL] [Keyword1] [Keyword2] ... 
[KeywordN]

");
   exit(0);
}

$targeturl=$ARGV[0];

if ( $numOfArgs == 2){
   $originalkeywordphrase = $ARGV[1];
}else {
   foreach $argnum (1 .. $#ARGV) {
      $originalkeywordphrase =  $originalkeywordphrase . " " . $ARGV[$argnum];

      #remove leading & trailing spaces
      $originalkeywordphrase =~ s/^s+//;
      $originalkeywordphrase =~ s/s+$//;
   }
}

$keywordphrase= $originalkeywordphrase;
$keywordphrase =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;

# define Source Urls
$listingNo=100;

$gurl= 
"http://www.google.com/search?num=$listingNo&hl=en&safe=off&q=$keywordphrase&sa=N";
$burl= "http://www.bing.com/search?q=$keywordphrase&first=1&count=100&";

### get SERP pages part
# get google SERP
$gserp = `wget "$gurl" 
--user-agent="Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
--output-document="gserp.html" --cookies=off`;
# get Bing SERP
$bserp = `wget "$burl" 
--user-agent="Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
--output-document="bserp.html" --cookies=off`;

### analysis part
$googlePositionNumber = getPosition ($targeturl, "google");
$bingSearchPositionNumber = getPosition ($targeturl, "bing");

# report part
##########################
print "
Ranking Summary Report
";
print   "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
";
print   "Keyword/Phrase: $originalkeywordphrase
";
print   "Target URL: $targeturl
";
print   "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
";
print   " Google.....: $googlePositionNumber
";

if($bingSearchPositionNumber ne "not found"){
   $cntAdjusted = $bingSearchPositionNumber + 1;
   print   " Bing Search: $cntAdjusted
";
}else{
   print   " Bing Search: $bingSearchPositionNumber
";
}
print "
Note: Check with specific SE to ensure correctness.
";

##### SUBROUTINES ####################################
sub getContent {
   $filename=shift;
   open INPUT, "<$filename";
   undef $/;
   $content = <INPUT>;
   close INPUT;
   #Restore behaviour
   $/ = "
";

   #substitute new line character with space character
   $content =~ s/
/ /g;
   #substitute quotes with nothing
   $content =~ s/"//g;

   #cleanup bing
   $content =~ s/<strong>//g;
   $content =~ s/</strong>//g;

   $content =~ s/<cite>//g;
   $content =~ s/</cite>//g;

   return $content;
}

sub getPosition {
   $targeturl= shift;
   $se = shift;
   @tokens = ();
   $offset = 0;
   if($se eq "google") {
      $gcontent = getContent("gserp.html");
      @tokens = split(/h3 class=r/, $gcontent);
   } elsif($se eq "bing") {
      $bcontent = getContent("bserp.html");
      @tokens = split(/sa_cc/, $bcontent);
      $offset=2;
   }

   $mastercnt = "not found";
   $cnt=0;
   $foundFlag = "no";
   print "number of tokens:". $#tokens;
   foreach $token (@tokens) {
      #print "
token: $token";
      if ($token =~ /$targeturl/gi) {
         if($foundFlag eq "no") {
            $mastercnt = $cnt - $offset;
         } else {
            $mastercnt = "" . $mastercnt . "," . $cnt;
         }
         #print "
MATCH: $targeturl cnt: $cnt $mastercnt
 token";

         #got a match return back position number
         $foundFlag = "yes";
      }
      $cnt = $cnt + 1;
   }
   return $mastercnt;
}

Chapter 15

sql.txt

CREATE TABLE `mytest`.`queue` (
`id` INT( 6 ) NOT NULL AUTO_INCREMENT PRIMARY KEY ,
`message` TEXT NOT NULL ,
`status` INT( 1 ) NOT NULL DEFAULT '0'
) ENGINE = MYISAM ;

config.php

<?
# change all lines but the last line (Twitter status update link)
#database
$username="your-db-username";
$password="your-db-password";
$database="your-database-name";

#twitter
$tusrid = 'your-twitter-userid';
$tpasswd = 'your-twitter-password';
$tURL = 'http://twitter.com/statuses/update.xml';

?>

index.php

<html>

<head>

<title>Home</title>

<script>
function limitText(limitField, limitNum) {
    if (limitField.value.length > limitNum) {
        limitField.value = limitField.value.substring(0, limitNum);
    }
}
</script>


</head>

<body>

<h3> What will you be doing? <br>(or what do you want others to think you are doing)
</h3><br>
<form name=mainform method=post action=add.php onSubmit="return checkLength(this)">

<textarea name="message" rows="3" cols="80" onKeyDown="limitText(this,140);"
onKeyUp="limitText(this,140);">
</textarea> <br>
<input type=submit value='Add Future Tweet'>

</form>
<br>


<?php
include("config.php");
mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");
$query="SELECT * FROM queue where status=0 order by id desc";
$result=mysql_query($query);

$numOfRecords=mysql_numrows($result);

mysql_close();

echo "<b>My Future Tweets</center></b><br><hr>";

?>

<table border="1" cellspacing="2" cellpadding="2">
<tr>
<td><b>id</b></td>
<td><b>Tweet</b></td>
<td><b>Status</b></td>
</tr>

<?
$i=0;
while ($i < $numOfRecords) {
   $id=mysql_result($result,$i,"id");
   $message=mysql_result($result,$i,"message");
   $status=mysql_result($result,$i,"status");
?>

<tr>
<td nowrap><? echo "$id"; ?></td>
<td width=350><? echo "$message"; ?>

   <?php

   $tmp = "";
   if ($status < 1) {
     $tmp = "Not Sent";
   }

   ?>
&nbsp; <a href="delete.php?id=<?php echo $id ?>">Delete</a>

</td><td nowarp><? echo "$tmp"; ?></td>
</tr>
   <?
   $i=$i+1;
}
?>
</table>
</body>
</html>

add.php

<?
include("config.php");

mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");

$message = $_POST['message'];

$query = "INSERT INTO queue (message) VALUES ('$message')";
mysql_query($query);

mysql_close();

?>

<script>
alert('Tweet Added'),
window.location.href = "index.php";
</script>

delete.php

<?
include("config.php");

mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");

$myid = $_GET['id'];

$query="UPDATE queue SET status=2 WHERE id=$myid";

mysql_query($query);

mysql_close();

?>

<script>
alert('Tweet Removed'),
window.location.href = "index.php";
</script>

sendTweet.php

<?php

include("config.php");

mysql_connect(localhost,$username,$password);
@mysql_select_db($database) or die( "Unable to select my database");

### get the tweet
$result = 
mysql_query("select id, message from queue where status=0 order by id asc LIMIT 1");

$row = mysql_fetch_array($result);

### send the tweet
$curl_handle = curl_init();
curl_setopt($curl_handle, CURLOPT_URL, "$tURL");
curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2);
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl_handle, CURLOPT_POST, 1);

$message = $row['message'];

curl_setopt($curl_handle, CURLOPT_POSTFIELDS, "status=$message");
curl_setopt($curl_handle, CURLOPT_USERPWD, "$tusrid:$tpasswd");

$response = curl_exec($curl_handle);

curl_close($curl_handle);

// get the status message
if (empty($response)) {
    echo 'tweet not delivered';
} else {
    echo 'tweet delivered';
    ###update db status
    $mid = $row['id'];
    mysql_query("UPDATE queue SET status = 1 WHERE id = $mid");
}

mysql_close();
?>

Crontab

# Tweet 5 times a day at 7am, 9am, 11am, 1pm and 3pm
* 8,10,12,14,16 * * * php sendTweet.php

Chapter 18

The following code listings represent only the main listings. For the full source code please visit book.seowarrior.com.

index.html

<html>
<head>

<title>SEO Warrior: Keyboard Dashboard (Alfa)</title>
<link rel="stylesheet" type="text/css" href="pagestyle.css" />
<script src="functions.js" type="text/javascript"></script>
<script src="dockablewindow.js" type="text/javascript"></script>
</head>

<body>
<table width=100% cellpadding=0 cellspacing=0 border=0>
<tr>
<td valign=top align=left><h1 style='color=blue'>SEO Warrior: 
Keyword Dashboard (Alpha) </h1>
</td>
<td valign=top align=right>
<img border=0 src="http://www.seowarrior.net/images/status.png" 
title="SEO Warrior: Keyword Dashboard Status">

<a href="http://www.seowarrior.net/contact/" title="Report Bugs">
<font size=2>Report Bugs</font></a> |
<a href="http://www.seowarrior.net/contact/" title="Make a Suggestion">
<font size=2>Suggestion</font></a>
 <a href="http://www.seowarrior.net"><img border=0 
src="http://www.seowarrior.net/images/seowarriormini.png" title="SEO Warrior: 
Keyword Dashboard"></a>
</td>
</tr>
</table>

<div id="formdiv">
  <form name="mainform" onSubmit="return false;">
  Keyword: <input type="text" id="keyword" name="keyword" size="20">
  <input type="button" id="phaseGoogleBtn" name="phaseGoogleBtn" value="Google" 
onclick="stepOne('google')">
  <input type="button" id="phaseBingBtn" name="phaseBingBtn" value="Bing" 
onclick="stepOne('bing')">
  <input type="button" id="phaseYahooBtn" name="phaseYahooBtn" value="Yahoo!" 
onclick="stepOne('yahoo')">

    <input type="radio" name="resultLimit" value="10" checked >10
    <input type="radio" name="resultLimit" value="20">20
    <input type="radio" name="resultLimit" value="50">50
    [Results]
  </form>
</div>

<iframe onLoad="resizeG()" name="responsedivgoogle" id="responsedivgoogle" 
scrolling="no"></iframe>
<iframe onLoad="resizeY()" name="responsedivyahoo" id="responsedivyahoo" 
scrolling="no"></iframe>
<iframe onLoad="resizeB()" name="responsedivbing" id="responsedivbing" 
scrolling="no"></iframe>

<iframe name="detailsframe" id="detailsframe" class="dockclass"></iframe>

<script type="text/javascript">
var dock0=new dockit("detailsframe", 0);
</script>

</body>

</html>

bParser.php

<html>
<head>


<style>
body {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  text-decoration : bold;
  background : #f3f3f3;
}

a:hover {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  background : #989898;
  text-decoration : bold;
}

a:visited, a:link, a:active {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  color : #000022;
  text-decoration : normal;
}
</style>


</head>

<body>

<b>Bing</b>
<br>Keyword: <?=$_GET["keyword"]?>
<br>Showing</b> <?=$_GET["resultLimit"]?> results
<br><hr>
<?

function getBaseURL($url){
   list($part1, $part2) = split("://", $url);
   list($part3, $part4) = split("/", $part2);
#   $baseurl =  $part1 . "://" . $part3;
   $baseurl =  $part3;
   return $baseurl;
}


function getBingSERP($mykeyword, $myindex){
    $reg_ex = "[[:space:]]";
    $replace_word = "+";
    $str = $mykeyword;
    $mykeyword = ereg_replace($reg_ex, $replace_word, $str);

    $url = "http://www.bing.com/search?q=".$mykeyword."&first=".$myindex."&";
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_COOKIEFILE, "c:cookie.txt");
    $client = $_SERVER['HTTP_USER_AGENT'];
    curl_setopt($ch, CURLOPT_USERAGENT, "$client");
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    $output = curl_exec($ch);
    curl_close($ch);
    return $output;
}


function processSERP($serp, $masterCnt, $rowLimit) {
   $dom = new DOMDocument();
   @$dom->loadHTML($serp);
   $xpath = new DOMXPath($dom);
   $hrefs = $xpath->evaluate("/html/body//a");
   $sofar = "";
   for ($i = 0; $i < $hrefs->length; $i++) {
      $href = $hrefs->item($i);
      $url = $href->getAttribute('href'),

      $baseurl = getBaseURL($url);

      $urlChunks = spliti (" ", $_GET["keyword"]);

      foreach ($urlChunks as $chunk) {
         $highChunk = '<B>'.$chunk.'</B>';
         $baseurl = str_replace("$chunk", "$highChunk", $baseurl);
      }

      $anchortext = $href->nodeValue;

      if ( (preg_match("/live.com/i", "$url")) ||
          (preg_match("/msn.c/i", "$url")) ||
          (preg_match("/microsoft.com/i", "$url")) ) {
      }else {
         if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {
            if (strpos($sofar, $baseurl) !== false) {
            } else {
               if($masterCnt < $rowLimit){
                  ?>
<a target=detailsframe href='kw.php?url=<?=$url?>&keyword=<?=$_GET['keyword']?>' 
title='<?=$anchortext?>'><?=$baseurl?></a><br><?
                  $masterCnt++;
               }
            }
         }
      }
      $sofar = $sofar . $baseurl;
   }
   return $masterCnt;
}

$rowLimit = $_GET["resultLimit"];

$masterCnt = 0;

$next = 1;
$keyword = $_GET["keyword"];
$serpRes = getBingSERP($keyword, $next);
$masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
flush();

if($masterCnt<$rowLimit) {
   sleep(rand(1, 3));
   $next = $first+10;
   sleep(rand(2, 6));
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
   flush();
}

if($masterCnt<$rowLimit) {
   $next = $next+10;
   sleep(rand(1, 3));
   $serpRes = getBingSERP($keyword, $next);
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
   flush();
}

if($masterCnt<$rowLimit) {
   $next = $next+10;
   sleep(rand(1, 3));
   $serpRes = getBingSERP($keyword, $next);
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
   flush();
}

if($masterCnt<$rowLimit) {
   $nextRes = $next+10;
   sleep(rand(1, 3));
   $serpRes = getBingSERP($keyword, $next);
   $masterCnt = processSERP($serpRes, $masterCnt, $rowLimit);
}

?>


</body>
</html>

gParser.php

<html>

<head>

<style>
body {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  text-decoration : bold;
  background : #f3f3f3;
}

a:hover {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  background : #989898;
  text-decoration : bold;
}

a:visited, a:link, a:active {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  color : #000022;
  text-decoration : normal;
}
</style>

</head>
<body>

<b>Google</b>
<br>Keyword: <?=$_GET["keyword"]?>
<br>Showing</b> <?=$_GET["resultLimit"]?> results
<br><hr>
<?
function getBaseURL($url){
   list($part1, $part2) = split("://", $url);
   list($part3, $part4) = split("/", $part2);
#   $baseurl =  $part1 . "://" . $part3;
   $baseurl =  $part3;
   return $baseurl;
}

function getGoogleSERP($mykeyword){

    $reg_ex = "[[:space:]]";
    $replace_word = "+";
    $str = $mykeyword;
    $mykeyword = ereg_replace($reg_ex, $replace_word, $str);

    $url = "http://www.google.com/search?q=".$mykeyword.".&num=50&";
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    $client = $_SERVER['HTTP_USER_AGENT'];
    curl_setopt($ch, CURLOPT_USERAGENT, "$client");
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    $output = curl_exec($ch);
    curl_close($ch);
    return $output;
}


$rowLimit = $_GET["resultLimit"];

$keyword = $_GET["keyword"];
$serp = getGoogleSERP($keyword);

$dom = new DOMDocument();
@$dom->loadHTML($serp);
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
$sofar = "";
$intCnt = 0;
for ($i = 0; $i < $hrefs->length; $i++) {
   $href = $hrefs->item($i);
   $url = $href->getAttribute('href'),

   $baseurl = getBaseURL($url);

   $anchortext = $href->nodeValue;

   $urlChunks = spliti (" ", $keyword);

   foreach ($urlChunks as $chunk) {
      $highChunk = '<B>'.$chunk.'</B>';
      $baseurl = str_replace("$chunk", "$highChunk", $baseurl);
   }

   if ( (preg_match("/google.com/i", "$url")) ||
        (preg_match("/youtube.com/i", "$url")) ||
        (preg_match("/^//i", "$url")) ||
        (preg_match("/cache:/i", "$url")) ) {
   }else {
      if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {

        if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {
            if (strpos($sofar, $baseurl) !== false) {
            } else {
               if($intCnt < $rowLimit) {
                  ?>
<a target=detailsframe href='kw.php?url=<?=$url?>&keyword=<?=$_GET['keyword']?>' 
title='<?=$anchortext?>'><?=$baseurl?></a><br><?
                  $intCnt++;
               }

            }
         }
      }
   }

   $sofar = $sofar . $baseurl;
}

?>

</body>
</html>

yParser.php

<html>
<head>


<style>
body {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  text-decoration : bold;
  background : #f3f3f3;
}

a:hover {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  background : #989898;
  text-decoration : bold;
}

a:visited, a:link, a:active {
  font-weight : normal;
  font-size : 12px;
  font-family : helvetica;
  color : #000022;
  text-decoration : normal;
}
</style>


</head>

<body>

<b>Yahoo!</b>
<br>Keyword: <?=$_GET["keyword"]?>
<br>Showing</b> <?=$_GET["resultLimit"]?> results
<br><hr>
<?

function getBaseURL($url){
   list($part1, $part2) = split("://", $url);
   list($part3, $part4) = split("/", $part2);
#   $baseurl =  $part1 . "://" . $part3;
   $baseurl =  $part3;
   return $baseurl;
}


function getYahooSERP($mykeyword){

    $reg_ex = "[[:space:]]";
    $replace_word = "+";
    $str = $mykeyword;
    $mykeyword = ereg_replace($reg_ex, $replace_word, $str);

    $url = "http://search.yahoo.com/search;_ylt=?p=".$mykeyword.".&n=100&";
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_REFERER, "http://search.yahoo.com/");
    $client = $_SERVER['HTTP_USER_AGENT'];
    curl_setopt($ch, CURLOPT_USERAGENT, "$client");
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    $output = curl_exec($ch);
    curl_close($ch);
    return $output;
}

$rowLimit = $_GET["resultLimit"];

$keyword = $_GET["keyword"];
$serp = getYahooSERP($keyword);

$dom = new DOMDocument();
@$dom->loadHTML($serp);
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
$sofar = "";

$intCnt = 0;
for ($i = 0; $i < $hrefs->length; $i++) {
   $href = $hrefs->item($i);
   $url = $href->getAttribute('href'),
   $tmpurl = "";
   list($tmp1, $tmpurl) = split('**', $url, 2);
   $tmpurl = urldecode($tmpurl);
   $baseurl = getBaseURL($tmpurl);

   $urlChunks = spliti (" ", $keyword);

   foreach ($urlChunks as $chunk) {
      $highChunk = '<B>'.$chunk.'</B>';
      $baseurl = str_replace("$chunk", "$highChunk", $baseurl);
   }


   $anchor = $href->getAttribute('title'),
   $anchortext = $href->nodeValue;
   if ( preg_match("/**/i", "$url") )  {

      if ( preg_match("/yahoo.com/i", "$baseurl") || preg_match("/cache/i", 
"$url") )  {
      } else {
         if (preg_match("/^http/i", "$url") || preg_match("/^ftp/i", "$url")) {
            if (strpos($sofar, $baseurl) !== false) {
            } else {
               if($intCnt < $rowLimit) {
                  ?>
<a target=detailsframe href='kw.php?url=<?=$tmpurl?>&keyword=<?=$_GET['keyword']?>' 
title='<?=$anchortext?>'><?=$baseurl?></a><br><?

                  $intCnt++;
               }
            }
         }
      }
   }
   $sofar = $sofar . $baseurl;
}

?>
</body>
</html>
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
3.146.107.89