Skip to content

Commit efc6447

Browse files
authored
Throttle requests (#65)
* Fix styling * Throttle requests * Fix styling --------- Co-authored-by: Baspa <[email protected]>
1 parent 1f72d97 commit efc6447

27 files changed

+191
-148
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Easily configure which routes to scan, exclude or include specific checks or eve
2929
- [Scanning routes](#scanning-routes)
3030
- [Scanning a single route](#scanning-a-single-route)
3131
- [Scanning routes in an SPA application](#scanning-routes-in-an-spa-application)
32+
- [Throttling](#throttling)
3233
- [Scan model urls](#scan-model-urls)
3334
- [Saving scans into the database](#saving-scans-into-the-database)
3435
- [Listening to events](#listening-to-events)
@@ -186,6 +187,17 @@ php artisan seo:scan-url https://vormkracht10.nl --javascript
186187

187188
> Note: This command will use Puppeteer to render the page. Make sure that you have Puppeteer installed on your system. You can install Puppeteer by running the following command: `npm install puppeteer`. **At this moment it's only available when scanning single routes.**
188189
190+
### Throttling
191+
192+
If you want to throttle the requests, you can set the `throttle` option to `true` in the config file. You can also set the amount of requests per minute by setting the `requests_per_minute` option in the config file.
193+
194+
```php
195+
'throttle' => [
196+
'enabled' => false,
197+
'requests_per_minute' => 10,
198+
],
199+
```
200+
189201
### Scan model urls
190202

191203
When you have an application where you have a lot of pages which are related to a model, you can save the SEO score to the model. This way you can check the SEO score of a specific page and show it in your application.

config/seo.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@
9090
'vapor-ui/*',
9191
],
9292

93+
'throttle' => [
94+
'enabled' => false,
95+
'requests_per_minute' => null,
96+
],
97+
9398
/*
9499
|--------------------------------------------------------------------------
95100
| Domains (DNS resolving)

src/Commands/SeoScan.php

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,26 +94,52 @@ public function handle(): int
9494
private function calculateScoreForRoutes(): void
9595
{
9696
$routes = self::getRoutes();
97+
$throttleEnabled = config('seo.throttle.enabled');
98+
$maxRequests = config('seo.throttle.requests_per_minute') ?? 'N/A';
99+
$requestCount = 0;
100+
$startTime = time();
101+
102+
if ($throttleEnabled) {
103+
$this->line('<fg=yellow>Throttling enabled. Maximum requests per minute: '.$maxRequests.'</>');
104+
sleep(5);
105+
}
97106

98-
$routes->each(function ($path, $name) {
107+
$routes->each(function ($path, $name) use ($throttleEnabled, $maxRequests, &$requestCount, &$startTime) {
99108
$this->progress->start();
100109

101-
$seo = Seo::check(url: route($name), progress: $this->progress, useJavascript: config('seo.javascript'));
102-
103-
$this->failed += count($seo->getFailedChecks());
104-
$this->success += count($seo->getSuccessfulChecks());
105-
$this->routeCount++;
110+
if ($throttleEnabled) {
106111

107-
if (config('seo.database.save')) {
108-
$this->saveScoreToDatabase(seo: $seo, url: route($name));
112+
if ($requestCount >= $maxRequests) {
113+
$elapsedTime = time() - $startTime;
114+
if ($elapsedTime < 60) {
115+
sleep(60 - $elapsedTime);
116+
}
117+
$requestCount = 0;
118+
$startTime = time();
119+
}
120+
$requestCount++;
109121
}
110122

123+
$this->performSeoCheck($name);
111124
$this->progress->finish();
112-
113-
$this->logResultToConsole($seo, route($name));
114125
});
115126
}
116127

128+
private function performSeoCheck($name): void
129+
{
130+
$seo = Seo::check(url: route($name), progress: $this->progress, useJavascript: config('seo.javascript'));
131+
132+
$this->failed += count($seo->getFailedChecks());
133+
$this->success += count($seo->getSuccessfulChecks());
134+
$this->routeCount++;
135+
136+
if (config('seo.database.save')) {
137+
$this->saveScoreToDatabase(seo: $seo, url: route($name));
138+
}
139+
140+
$this->logResultToConsole($seo, route($name));
141+
}
142+
117143
private static function getRoutes(): Collection
118144
{
119145
$routes = collect(app('router')->getRoutes()->getRoutesByName())

tests/Checks/Configuration/NoFollowCheckTest.php

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use Vormkracht10\Seo\Checks\Configuration\NoFollowCheck;
66

77
it('can perform the nofollow check with robots tag', function () {
8-
$check = new NoFollowCheck();
9-
$crawler = new Crawler();
8+
$check = new NoFollowCheck;
9+
$crawler = new Crawler;
1010

1111
Http::fake([
1212
'vormkracht10.nl' => Http::response('', 200, ['X-Robots-Tag' => 'nofollow']),
@@ -18,8 +18,8 @@
1818
});
1919

2020
it('can perform the nofollow check with robots metatag', function () {
21-
$check = new NoFollowCheck();
22-
$crawler = new Crawler();
21+
$check = new NoFollowCheck;
22+
$crawler = new Crawler;
2323

2424
Http::fake([
2525
'vormkracht10.nl' => Http::response('<html><head><meta name="robots" content="nofollow"></head></html>', 200),
@@ -31,8 +31,8 @@
3131
});
3232

3333
it('can perform the nofollow check with googlebot metatag', function () {
34-
$check = new NoFollowCheck();
35-
$crawler = new Crawler();
34+
$check = new NoFollowCheck;
35+
$crawler = new Crawler;
3636

3737
Http::fake([
3838
'vormkracht10.nl' => Http::response('<html><head><meta name="googlebot" content="nofollow"></head></html>', 200),

tests/Checks/Configuration/NoIndexCheckTest.php

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use Vormkracht10\Seo\Checks\Configuration\NoIndexCheck;
66

77
it('can perform the noindex check with robots tag', function () {
8-
$check = new NoIndexCheck();
9-
$crawler = new Crawler();
8+
$check = new NoIndexCheck;
9+
$crawler = new Crawler;
1010

1111
Http::fake([
1212
'vormkracht10.nl' => Http::response('', 200, ['X-Robots-Tag' => 'noindex']),
@@ -18,8 +18,8 @@
1818
});
1919

2020
it('can perform the noindex check with robots metatag', function () {
21-
$check = new NoIndexCheck();
22-
$crawler = new Crawler();
21+
$check = new NoIndexCheck;
22+
$crawler = new Crawler;
2323

2424
Http::fake([
2525
'vormkracht10.nl' => Http::response('<html><head><meta name="robots" content="noindex"></head></html>', 200),
@@ -31,8 +31,8 @@
3131
});
3232

3333
it('can perform the noindex check with googlebot metatag', function () {
34-
$check = new NoIndexCheck();
35-
$crawler = new Crawler();
34+
$check = new NoIndexCheck;
35+
$crawler = new Crawler;
3636

3737
Http::fake([
3838
'vormkracht10.nl' => Http::response('<html><head><meta name="googlebot" content="noindex"></head></html>', 200),

tests/Checks/Configuration/RobotsCheckTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
use Vormkracht10\Seo\Checks\Configuration\RobotsCheck;
66

77
it('can perform the robots check', function () {
8-
$check = new RobotsCheck();
8+
$check = new RobotsCheck;
99

1010
Http::fake([
1111
'vormkracht10.nl/robots.txt' => Http::response('User-agent: Googlebot
1212
Disallow: /admin', 200),
1313
]);
1414

15-
$this->assertTrue($check->check(Http::get('vormkracht10.nl'), new Crawler()));
15+
$this->assertTrue($check->check(Http::get('vormkracht10.nl'), new Crawler));
1616
});

tests/Checks/Content/AltTagCheckTest.php

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use Vormkracht10\Seo\Checks\Content\AltTagCheck;
66

77
it('can perform the alt tag check with alt', function () {
8-
$check = new AltTagCheck();
9-
$crawler = new Crawler();
8+
$check = new AltTagCheck;
9+
$crawler = new Crawler;
1010

1111
Http::fake([
1212
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/images/logo.png" width="5" height="5" alt="Vormkracht10 logo"></body></html>', 200),
@@ -18,8 +18,8 @@
1818
});
1919

2020
it('can perform the alt tag check without alt', function () {
21-
$check = new AltTagCheck();
22-
$crawler = new Crawler();
21+
$check = new AltTagCheck;
22+
$crawler = new Crawler;
2323

2424
Http::fake([
2525
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/images/logo.png" width="5" height="5"></body></html>', 200),
@@ -31,8 +31,8 @@
3131
});
3232

3333
it('can perform the alt tag check with empty alt', function () {
34-
$check = new AltTagCheck();
35-
$crawler = new Crawler();
34+
$check = new AltTagCheck;
35+
$crawler = new Crawler;
3636

3737
Http::fake([
3838
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/images/logo.png" width="5" height="5" alt=""></body></html>', 200),

tests/Checks/Content/BrokenImageCheckTest.php

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use Vormkracht10\Seo\Checks\Content\BrokenImageCheck;
66

77
it('can perform the broken image check on broken images', function () {
8-
$check = new BrokenImageCheck();
9-
$crawler = new Crawler();
8+
$check = new BrokenImageCheck;
9+
$crawler = new Crawler;
1010

1111
Http::fake([
1212
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/404"></body></html>', 200),
@@ -18,8 +18,8 @@
1818
});
1919

2020
it('can perform the broken image check on working images', function () {
21-
$check = new BrokenImageCheck();
22-
$crawler = new Crawler();
21+
$check = new BrokenImageCheck;
22+
$crawler = new Crawler;
2323

2424
Http::fake([
2525
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl"></body></html>', 200),
@@ -31,8 +31,8 @@
3131
});
3232

3333
it('can perform the broken image check on content where no images are used', function () {
34-
$check = new BrokenImageCheck();
35-
$crawler = new Crawler();
34+
$check = new BrokenImageCheck;
35+
$crawler = new Crawler;
3636

3737
Http::fake([
3838
'vormkracht10.nl' => Http::response('<html><head></head><body></body></html>', 200),

tests/Checks/Content/BrokenLinkCheckTest.php

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use Vormkracht10\Seo\Checks\Content\BrokenLinkCheck;
66

77
it('can perform the broken link check on broken links', function () {
8-
$check = new BrokenLinkCheck();
9-
$crawler = new Crawler();
8+
$check = new BrokenLinkCheck;
9+
$crawler = new Crawler;
1010

1111
Http::fake([
1212
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl/404">Vormkracht10</a></body></html>', 200),
@@ -18,8 +18,8 @@
1818
});
1919

2020
it('can perform the broken link check on working links', function () {
21-
$check = new BrokenLinkCheck();
22-
$crawler = new Crawler();
21+
$check = new BrokenLinkCheck;
22+
$crawler = new Crawler;
2323

2424
Http::fake([
2525
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl">Vormkracht10</a></body></html>', 200),
@@ -31,8 +31,8 @@
3131
});
3232

3333
it('can perform the broken link check on content where no links are used', function () {
34-
$check = new BrokenLinkCheck();
35-
$crawler = new Crawler();
34+
$check = new BrokenLinkCheck;
35+
$crawler = new Crawler;
3636

3737
Http::fake([
3838
'vormkracht10.nl' => Http::response('<html><head></head><body></body></html>', 200),
@@ -44,8 +44,8 @@
4444
});
4545

4646
it('can run the broken link check on a relative url', function () {
47-
$check = new BrokenLinkCheck();
48-
$crawler = new Crawler();
47+
$check = new BrokenLinkCheck;
48+
$crawler = new Crawler;
4949

5050
Http::fake([
5151
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="/404">Vormkracht10</a></body></html>', 200),
@@ -59,8 +59,8 @@
5959
it('can bypass DNS layers using DNS resolving', function () {
6060
$this->markTestSkipped('This test is skipped because we cannot fake DNS resolving.');
6161

62-
$check = new BrokenLinkCheck();
63-
$crawler = new Crawler();
62+
$check = new BrokenLinkCheck;
63+
$crawler = new Crawler;
6464

6565
Http::fake([
6666
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl">Vormkracht10</a></body></html>', 200),
@@ -76,8 +76,8 @@
7676
});
7777

7878
it('cannot bypass DNS layers using a fake IP when DNS resolving', function () {
79-
$check = new BrokenLinkCheck();
80-
$crawler = new Crawler();
79+
$check = new BrokenLinkCheck;
80+
$crawler = new Crawler;
8181

8282
config(['seo.resolve' => [
8383
'vormkracht10.nl' => '8.8.8.8',
@@ -93,8 +93,8 @@
9393
});
9494

9595
it('can check if link is broken by checking on configured status codes', function () {
96-
$check = new BrokenLinkCheck();
97-
$crawler = new Crawler();
96+
$check = new BrokenLinkCheck;
97+
$crawler = new Crawler;
9898

9999
config(['seo.broken_link_check.status_codes' => ['403']]);
100100

@@ -108,8 +108,8 @@
108108
});
109109

110110
it('can exclude certain paths from the broken link check', function () {
111-
$check = new BrokenLinkCheck();
112-
$crawler = new Crawler();
111+
$check = new BrokenLinkCheck;
112+
$crawler = new Crawler;
113113

114114
config(['seo.broken_link_check.exclude_links' => ['https://vormkracht10.nl/excluded']]);
115115

tests/Checks/Content/ContentLengthCheckTest.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use Vormkracht10\Seo\Checks\Content\ContentLengthCheck;
66

77
it('can perform the content length check on content with a length of 2100 characters', function () {
8-
$check = new ContentLengthCheck();
9-
$crawler = new Crawler();
8+
$check = new ContentLengthCheck;
9+
$crawler = new Crawler;
1010

1111
Http::fake([
1212
'vormkracht10.nl' => Http::response(
@@ -26,8 +26,8 @@
2626
});
2727

2828
it('can perform the content length check on content with less characters', function () {
29-
$check = new ContentLengthCheck();
30-
$crawler = new Crawler();
29+
$check = new ContentLengthCheck;
30+
$crawler = new Crawler;
3131

3232
Http::fake([
3333
'vormkracht10.nl' => Http::response(

tests/Checks/Content/KeywordInFirstParagraphCheckTest.php

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use Vormkracht10\Seo\Checks\Content\KeywordInFirstParagraphCheck;
66

77
it('can perform the keyword in first paragraph check on a page with the keyword in the first paragraph', function () {
8-
$check = new KeywordInFirstParagraphCheck();
9-
$crawler = new Crawler();
8+
$check = new KeywordInFirstParagraphCheck;
9+
$crawler = new Crawler;
1010

1111
Http::fake([
1212
'vormkracht10.nl' => Http::response('<html><head><meta name="keywords" content="vormkracht10, seo, laravel, package"></head><body><p>vormkracht10 is a great company that specializes in SEO and Laravel packages.</p></body></html>', 200),
@@ -18,8 +18,8 @@
1818
});
1919

2020
it('can perform the keyword in first paragraph check on a page without the keyword in the first paragraph', function () {
21-
$check = new KeywordInFirstParagraphCheck();
22-
$crawler = new Crawler();
21+
$check = new KeywordInFirstParagraphCheck;
22+
$crawler = new Crawler;
2323

2424
Http::fake([
2525
'vormkracht10.nl' => Http::response('<html><head><meta name="keywords" content="seo, laravel, package"></head><body><p>Lorem ipsum dolor sit amet.</p></body></html>', 200),
@@ -31,8 +31,8 @@
3131
});
3232

3333
it('can perform the keyword in first paragraph check on a page without keywords', function () {
34-
$check = new KeywordInFirstParagraphCheck();
35-
$crawler = new Crawler();
34+
$check = new KeywordInFirstParagraphCheck;
35+
$crawler = new Crawler;
3636

3737
Http::fake([
3838
'vormkracht10.nl' => Http::response('<html><head></head><body></body></html>', 200),

0 commit comments

Comments
 (0)