DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

Export All Links From Any Page - PHP Code - Outbound/Inbound Links Checker

02.19.2012
| 5316 views |
  • submit to reddit
        This code will show all internal and external links individually and also in count. This is being used in <a href="http://www.link-checker.org/" rel="dofollow" alt="link checker">link checker</a> as outbound links checking option.


<form method="post">
URL here: <input type="text" name="url" value="http://" /><br />
<input type="submit" name="submit" value="submit" />
</form>
    
    <?php
    // retrieve link destinations
    function get_a_href($url){
    $url = htmlentities(strip_tags($url));
   $ExplodeUrlInArray = explode('/',$url);
    $DomainName = $ExplodeUrlInArray[2];
    $file = @file_get_contents($url);
    $h1count = preg_match_all('/(href=["|\'])(.*?)(["|\'])/i',$file,$patterns);
    $linksInArray = $patterns[2];
    $CountOfLinks = count($linksInArray);
    $InternalLinkCount = 0;
    $ExternalLinkCount = 0;
    for($Counter=0;$Counter<$CountOfLinks;$Counter++)
    {

    if($linksInArray[$Counter] == "" || $linksInArray[$Counter] == "#")
    continue;
    preg_match('/javascript:/', $linksInArray[$Counter],$CheckJavascriptLink);
    if($CheckJavascriptLink != NULL)
    continue;
    $Link = $linksInArray[$Counter];
    preg_match('/\?/', $linksInArray[$Counter],$CheckForArgumentsInUrl);
    if($CheckForArgumentsInUrl != NULL)
    {
    $ExplodeLink = explode('?',$linksInArray[$Counter]);
    $Link = $ExplodeLink[0];
    }
    preg_match('/'.$DomainName.'/',$Link,$Check);
    if($Check == NULL)
    {
    preg_match('/http:\/\//',$Link,$ExternalLinkCheck);
    if($ExternalLinkCheck == NULL)
    {
    $InternalDomainsInArray[$InternalLinkCount] = $Link;
    $InternalLinkCount++;
    }
    else
    {
    $ExternalDomainsInArray[$ExternalLinkCount] = $Link;
    $ExternalLinkCount++;
    }

    }
    else
    {
    $InternalDomainsInArray[$InternalLinkCount] = $Link;
    $InternalLinkCount++;
    }
    }

    $LinksResultsInArray = array(
    'ExternalLinks'=>$ExternalDomainsInArray,
    'InternalLinks'=>$InternalDomainsInArray
    );
    return $LinksResultsInArray;
    }

    if(isset($_POST['submit']) && $_POST['submit'] == 'submit')
    {

    $url = $_POST['url'];
    $linksInArray = get_a_href($url);
    $CountOfExternalLink = count($linksInArray['ExternalLinks']);
    $CountOfInternalLink = count($linksInArray['InternalLinks']);
    echo "<h1>Linking structure</h1>";

    if(!empty($linksInArray['ExternalLinks'])){
    echo "<br/>External Links found: (".$CountOfExternalLink.")<ul>";
    foreach($linksInArray['ExternalLinks'] as $key => $val){
    $val = preg_replace("/</","<",$val);
    echo "<li>" . htmlentities($val) . "</li>";
    }
    echo "</ul>";
    }else{
    echo "<br/><div class=\"error\">No External Links found</div><br/>";
    }

    if(!empty($linksInArray['InternalLinks'])){
    echo "<br/>Internal Links found: (".$CountOfInternalLink.")<ul>";
    foreach($linksInArray['InternalLinks'] as $key => $val){
    $val = preg_replace("/</","<",$val);
    echo "<li>" . htmlentities($val) . "</li>";
    }
    echo "</ul>";
    }else{
    echo "<br/><div class=\"error\">No Internal Links found</div><br/>";
    }
    }
    ?>