/**
* Known robot user agent substrings. Key is user agent substring, value is robot key name.
*
* There's a great list here: http://user-agent-string.info/list-of-ua/bots
*
* @var array
*/
protected $_knownRobots = array(
'a6-indexer' => 'a6-indexer',
'archive.org_box' => 'archive.org',
'ahrefsbot' => 'ahrefsbot',
'baiduspider' => 'baidu',
'bingbot' => 'bing',
'facebookexternalhit' => 'facebookextern',
'googlebot' => 'google',
'googlebot-mobile' => 'google',
'grapeshotcrawler' => 'grapeshot',
'mediapartners-google' => 'google',
'adsnot-google' => 'google',
'ia_archiver' => 'alexa',
'linkdexbot' => 'linkdexbot',
'magpie-crawler' => 'brandwatch',
'mj12bot' => 'mj12',
'msnbot' => 'msnbot',
'netseer crawler' => 'netseer',
'proximic' => 'proximic',
'scoutjet' => 'scoutjet',
'sogou web spider' => 'sogou',
'yahoo! slurp' => 'yahoo',
'yandex' => 'yandex',
/*'crawler',
'php/',
'zend_http_client',*/
);
/**
* Maps an robot key to info about it.
*
* @var array
*/
protected $_robotMap = array(
'a6-indexer' => array(
'title' => 'A6-Indexer',
'link' => 'http://www.a6corp.com/a6-web-scraping-policy',
),
'ahrefsbot' => array(
'title' => 'Ahrefs',
'link' => 'http://ahrefs.com/robot/',
),
'alexa' => array(
'title' => 'Alexa',
'link' => 'http://www.alexa.com/help/webmasters',
),
'archive.org' => array(
'title' => 'Internet Archive',
'link' => 'http://www.archive.org/details/archive.org_bot'
),
'baidu' => array(
'title' => 'Baidu',
'link' => 'http://www.baidu.com/search/spider.htm'
),
'bing' => array(
'title' => 'Bing',
'link' => 'http://www.bing.com/bingbot.htm'
),
'brandwatch' => array(
'title' => 'Brandwatch',
'link' => 'http://www.brandwatch.com/how-it-works/gathering-data/'
),
'facebookextern' => array(
'title' => 'Facebook',
'link' => 'http://www.facebook.com/externalhit_uatext.php'
),
'google' => array(
'title' => 'Google',
'link' => 'http://support.google.com/webmasters/bin/answer.py?hl=en&answer=1061943'
),
'grapeshot' => array(
'title' => 'GrapeshotCrawler',
'link' => 'http://www.grapeshot.co.uk/crawler.php'
),
'linkdexbot' => array(
'title' => 'Linkdexbot',
'link' => 'http://www.linkdex.com/about/bots'
),
'mj12' => array(
'title' => 'Majestic-12',
'link' => 'http://majestic12.co.uk/bot.php',
),
'msnbot' => array(
'title' => 'MSN',
'link' => 'http://search.msn.com/msnbot.htm'
),
'netseer' => array(
'title' => 'NetSeer',
'link' => 'http://www.netseer.com/crawler.html'
),
'proximic' => array(
'title' => 'Proximic',
'link' => 'http://www.proximic.com/info/spider.php'
),
'scoutjet' => array(
'title' => 'Blekko',
'link' => 'http://www.scoutjet.com/',
),
'sogou' => array(
'title' => 'Sogou',
'link' => 'http://www.sogou.com/docs/help/webmasters.htm#07'
),
'unknown' => array(
'title' => 'Unknown',
'link' => ''
),
'yahoo' => array(
'title' => 'Yahoo',
'link' => 'http://help.yahoo.com/help/us/ysearch/slurp'
),
'yandex' => array(
'title' => 'Yandex',
'link' => 'http://help.yandex.com/search/?id=1112030'
)
);