Skip to content

Commit 622998f

Browse files
authored
Merge pull request #4156 from seleniumbase/more-stealth-features
More Stealth Features (and more)
2 parents f6f6e6f + b480a8b commit 622998f

32 files changed

+1386
-176
lines changed

examples/cdp_mode/ReadMe.md

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,10 +370,15 @@ with SB(uc=True, test=True, locale="en", pls="none") as sb:
370370

371371
```python
372372
sb.cdp.get(url, **kwargs)
373-
sb.cdp.open(url, **kwargs)
373+
sb.cdp.open(url, **kwargs) # Same as sb.cdp.get(url, **kwargs)
374374
sb.cdp.reload(ignore_cache=True, script_to_evaluate_on_load=None)
375375
sb.cdp.refresh(*args, **kwargs)
376376
sb.cdp.get_event_loop()
377+
sb.cdp.get_rd_host() # Returns the remote-debugging host
378+
sb.cdp.get_rd_port() # Returns the remote-debugging port
379+
sb.cdp.get_rd_url() # Returns the remote-debugging URL
380+
sb.cdp.get_endpoint_url() # Same as sb.cdp.get_rd_url()
381+
sb.cdp.get_port() # Same as sb.cdp.get_rd_port()
377382
sb.cdp.add_handler(event, handler)
378383
sb.cdp.find_element(selector, best_match=False, timeout=None)
379384
sb.cdp.find(selector, best_match=False, timeout=None)
@@ -487,6 +492,7 @@ sb.cdp.set_attributes(selector, attribute, value)
487492
sb.cdp.is_attribute_present(selector, attribute, value=None)
488493
sb.cdp.is_online()
489494
sb.cdp.solve_captcha()
495+
sb.cdp.click_captcha()
490496
sb.cdp.gui_press_key(key)
491497
sb.cdp.gui_press_keys(keys)
492498
sb.cdp.gui_write(text)
@@ -612,6 +618,69 @@ sb.driver.stop()
612618

613619
--------
614620

621+
### 🐙 <b translate="no">CDP Mode</b> Async API / Methods
622+
623+
```python
624+
await get(url="about:blank")
625+
await open(url="about:blank")
626+
await find(text, best_match=False, timeout=10) # `text` can be a selector
627+
await find_all(text, timeout=10) # `text` can be a selector
628+
await select(selector, timeout=10)
629+
await select_all(selector, timeout=10, include_frames=False)
630+
await query_selector(selector)
631+
await query_selector_all(selector)
632+
await find_element_by_text(text, best_match=False)
633+
await find_elements_by_text(text)
634+
await reload(ignore_cache=True, script_to_evaluate_on_load=None)
635+
await evaluate(expression)
636+
await js_dumps(obj_name)
637+
await back()
638+
await forward()
639+
await get_window()
640+
await get_content()
641+
await maximize()
642+
await minimize()
643+
await fullscreen()
644+
await medimize()
645+
await set_window_size(left=0, top=0, width=1280, height=1024)
646+
await set_window_rect(left=0, top=0, width=1280, height=1024)
647+
await activate()
648+
await bring_to_front()
649+
await set_window_state(left=0, top=0, width=1280, height=720, state="normal")
650+
await get_navigation_history()
651+
await open_external_inspector() # Open a separate browser for debugging
652+
await close()
653+
await scroll_down(amount=25)
654+
await scroll_up(amount=25)
655+
await wait_for(selector="", text="", timeout=10)
656+
await download_file(url, filename=None)
657+
await save_screenshot(filename="auto", format="png", full_page=False)
658+
await print_to_pdf(filename="auto")
659+
await set_download_path(path)
660+
await get_all_linked_sources()
661+
await get_all_urls(absolute=True)
662+
await get_html()
663+
await get_page_source()
664+
await is_element_present(selector)
665+
await is_element_visible(selector)
666+
await get_element_rect(selector, timeout=5) # (relative to window)
667+
await get_window_rect()
668+
await get_gui_element_rect(selector, timeout=5) # (relative to screen)
669+
await get_title()
670+
await send_keys(selector, text, timeout=5)
671+
await type(selector, text, timeout=5)
672+
await click(selector, timeout=5)
673+
await click_with_offset(selector, x, y, center=False, timeout=5)
674+
await solve_captcha()
675+
await click_captcha() # Same as solve_captcha()
676+
await get_document()
677+
await get_flattened_document()
678+
await get_local_storage()
679+
await set_local_storage(items)
680+
```
681+
682+
--------
683+
615684
### 🐙 <b translate="no">CDP Mode</b> WebElement API / Methods
616685

617686
After finding an element in CDP Mode, you can access `WebElement` methods:
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
<!-- SeleniumBase Docs -->
2+
3+
<h2><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/img/logo6.png" title="SeleniumBase" width="32"></a> Stealthy Playwright 🎭</h2>
4+
5+
🎭 <b translate="no">Stealthy Playwright Mode</b> is a special mode of <b translate="no">SeleniumBase</b> that launches <a href="https://github.com/microsoft/playwright-python">Playwright</a> from <a href="https://github.com/seleniumbase/SeleniumBase/blob/master/examples/cdp_mode/ReadMe.md" translate="no">SeleniumBase CDP Mode</a> in order to grant <b translate="no">Playwright</b> new stealth features, such as the ability to click CAPTCHA checkboxes successfully. <b translate="no">Playwright</b> uses <code>connect_over_cdp()</code> to attach itself onto an existing <b translate="no">SeleniumBase</b> session via the <code>remote-debugging-port</code>. From here, APIs of both frameworks can be used, giving you a hybrid approach that delivers the best experience of both worlds.
6+
7+
--------
8+
9+
### 🎭 Getting started with <b translate="no">Stealthy Playwright Mode</b>:
10+
11+
If `playwright` isn't already installed, then install it first:
12+
13+
```zsh
14+
pip install playwright
15+
```
16+
17+
Stealthy Playwright Mode comes in 3 formats:
18+
1. `sb_cdp` sync format
19+
2. `SB` nested sync format
20+
3. `cdp_driver` async format
21+
22+
23+
#### `sb_cdp` sync format (minimal boilerplate):
24+
25+
```python
26+
from playwright.sync_api import sync_playwright
27+
from seleniumbase import sb_cdp
28+
29+
sb = sb_cdp.Chrome()
30+
endpoint_url = sb.get_endpoint_url()
31+
32+
with sync_playwright() as p:
33+
browser = p.chromium.connect_over_cdp(endpoint_url)
34+
context = browser.contexts[0]
35+
page = context.pages[0]
36+
page.goto("https://example.com")
37+
```
38+
39+
#### `SB` nested sync format (minimal boilerplate):
40+
41+
```python
42+
from playwright.sync_api import sync_playwright
43+
from seleniumbase import SB
44+
45+
with SB(uc=True) as sb:
46+
sb.activate_cdp_mode()
47+
endpoint_url = sb.cdp.get_endpoint_url()
48+
49+
with sync_playwright() as p:
50+
browser = p.chromium.connect_over_cdp(endpoint_url)
51+
context = browser.contexts[0]
52+
page = context.pages[0]
53+
page.goto("https://example.com")
54+
```
55+
56+
#### `cdp_driver` async format (minimal boilerplate):
57+
58+
```python
59+
import asyncio
60+
from seleniumbase import cdp_driver
61+
from playwright.async_api import async_playwright
62+
63+
async def main():
64+
driver = await cdp_driver.start_async()
65+
endpoint_url = driver.get_endpoint_url()
66+
67+
async with async_playwright() as p:
68+
browser = await p.chromium.connect_over_cdp(endpoint_url)
69+
context = browser.contexts[0]
70+
page = context.pages[0]
71+
await page.goto("https://example.com")
72+
73+
if __name__ == "__main__":
74+
loop = asyncio.new_event_loop()
75+
loop.run_until_complete(main())
76+
```
77+
78+
### 🎭 <b translate="no">Stealthy Playwright Mode</b> details:
79+
80+
The `sb_cdp` and `cdp_driver` formats don't use WebDriver at all, meaning that `chromedriver` isn't needed. From these two formats, Stealthy Playwright Mode can call [CDP Mode methods](https://github.com/seleniumbase/SeleniumBase/blob/master/help_docs/cdp_mode_methods.md) and Playwright methods.
81+
82+
The `SB()` format requires WebDriver, therefore `chromedriver` will be downloaded (as `uc_driver`) if the driver isn't already present on the local machine. The `SB()` format has access to Selenium WebDriver methods via [the SeleniumBase API](https://github.com/seleniumbase/SeleniumBase/blob/master/help_docs/method_summary.md). Using Stealthy Playwright Mode from `SB()` grants access to all the APIs: Selenium, SeleniumBase, [UC Mode](https://github.com/seleniumbase/SeleniumBase/blob/master/help_docs/uc_mode.md), [CDP Mode](https://github.com/seleniumbase/SeleniumBase/blob/master/examples/cdp_mode/ReadMe.md), and Playwright.
83+
84+
In the sync formats, `get_endpoint_url()` also applies `nest-asyncio` so that nested event loops are allowed. (Python doesn't allow nested event loops by default). Without this, you'd get the error: `"Cannot run the event loop while another loop is running"` when calling CDP Mode methods (such as `solve_captcha()`) from within the Playwright context manager. This `nest-asyncio` call is done behind-the-scenes so that users don't need to handle this on their own.
85+
86+
### 🎭 <b translate="no">Stealthy Playwright Mode</b> examples:
87+
88+
Here's an example that queries Microsoft Copilot:
89+
90+
```python
91+
from playwright.sync_api import sync_playwright
92+
from seleniumbase import sb_cdp
93+
94+
sb = sb_cdp.Chrome()
95+
endpoint_url = sb.get_endpoint_url()
96+
97+
with sync_playwright() as p:
98+
browser = p.chromium.connect_over_cdp(endpoint_url)
99+
context = browser.contexts[0]
100+
page = context.pages[0]
101+
page.goto("https://copilot.microsoft.com")
102+
page.wait_for_selector("textarea#userInput")
103+
sb.sleep(1)
104+
query = "Playwright Python connect_over_cdp() sync example"
105+
page.fill("textarea#userInput", query)
106+
page.click('button[data-testid="submit-button"]')
107+
sb.sleep(3)
108+
sb.solve_captcha()
109+
page.wait_for_selector('button[data-testid*="-thumbs-up"]')
110+
sb.sleep(4)
111+
page.click('button[data-testid*="scroll-to-bottom"]')
112+
sb.sleep(3)
113+
chat_results = '[data-testid="highlighted-chats"]'
114+
result = page.locator(chat_results).inner_text()
115+
print(result.replace("\n\n", " \n"))
116+
```
117+
118+
Here's an example that solves the Bing CAPTCHA:
119+
120+
```python
121+
from playwright.sync_api import sync_playwright
122+
from seleniumbase import sb_cdp
123+
124+
sb = sb_cdp.Chrome(locale="en")
125+
endpoint_url = sb.get_endpoint_url()
126+
127+
with sync_playwright() as p:
128+
browser = p.chromium.connect_over_cdp(endpoint_url)
129+
context = browser.contexts[0]
130+
page = context.pages[0]
131+
page.goto("https://www.bing.com/turing/captcha/challenge")
132+
sb.sleep(3)
133+
sb.solve_captcha()
134+
sb.sleep(3)
135+
```
136+
137+
For more examples, see [examples/cdp_mode/playwright](https://github.com/seleniumbase/SeleniumBase/tree/master/examples/cdp_mode/playwright).
138+
139+
--------
140+
141+
<a href="https://github.com/seleniumbase/SeleniumBase"><img src="https://seleniumbase.github.io/img/logo6.png" alt="SeleniumBase" title="SeleniumBase" width="100" /></a><img src="https://seleniumbase.github.io/other/playwright_logo.png" alt="Playwright" title="SeleniumBase" height="100">

examples/cdp_mode/playwright/__init__.py

Whitespace-only changes.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import asyncio
2+
from playwright.async_api import async_playwright
3+
from seleniumbase import cdp_driver
4+
5+
6+
async def main():
7+
driver = await cdp_driver.start_async()
8+
endpoint_url = driver.get_endpoint_url()
9+
10+
async with async_playwright() as p:
11+
browser = await p.chromium.connect_over_cdp(endpoint_url)
12+
context = browser.contexts[0]
13+
page = context.pages[0]
14+
await page.goto("https://seleniumbase.io/simple/login")
15+
await page.fill("#username", "demo_user")
16+
await page.fill("#password", "secret_pass")
17+
await page.click("#log-in")
18+
await page.wait_for_selector("h1")
19+
await driver.sleep(1)
20+
21+
22+
if __name__ == "__main__":
23+
loop = asyncio.new_event_loop()
24+
loop.run_until_complete(main())
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from playwright.sync_api import sync_playwright
2+
from seleniumbase import SB
3+
4+
with SB(uc=True) as sb:
5+
sb.activate_cdp_mode()
6+
endpoint_url = sb.cdp.get_endpoint_url()
7+
8+
with sync_playwright() as p:
9+
browser = p.chromium.connect_over_cdp(endpoint_url)
10+
context = browser.contexts[0]
11+
page = context.pages[0]
12+
page.goto("https://seleniumbase.io/simple/login")
13+
page.fill("#username", "demo_user")
14+
page.fill("#password", "secret_pass")
15+
page.click("#log-in")
16+
page.wait_for_selector("h1")
17+
sb.sleep(1)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from playwright.sync_api import sync_playwright
2+
from seleniumbase import sb_cdp
3+
4+
sb = sb_cdp.Chrome()
5+
endpoint_url = sb.get_endpoint_url()
6+
7+
with sync_playwright() as p:
8+
browser = p.chromium.connect_over_cdp(endpoint_url)
9+
context = browser.contexts[0]
10+
page = context.pages[0]
11+
page.goto("https://seleniumbase.io/simple/login")
12+
page.fill("#username", "demo_user")
13+
page.fill("#password", "secret_pass")
14+
page.click("#log-in")
15+
page.wait_for_selector("h1")
16+
sb.sleep(1)
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import asyncio
2+
from playwright.async_api import async_playwright
3+
from seleniumbase import cdp_driver
4+
5+
6+
async def main():
7+
driver = await cdp_driver.start_async(locale="en")
8+
endpoint_url = driver.get_endpoint_url()
9+
10+
async with async_playwright() as p:
11+
browser = await p.chromium.connect_over_cdp(endpoint_url)
12+
context = browser.contexts[0]
13+
page = context.pages[0]
14+
await page.goto("https://www.bing.com/turing/captcha/challenge")
15+
await driver.sleep(3)
16+
await driver.solve_captcha()
17+
await driver.sleep(3)
18+
19+
20+
if __name__ == "__main__":
21+
loop = asyncio.new_event_loop()
22+
loop.run_until_complete(main())
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from playwright.sync_api import sync_playwright
2+
from seleniumbase import SB
3+
4+
with SB(uc=True, locale="en") as sb:
5+
sb.activate_cdp_mode()
6+
endpoint_url = sb.cdp.get_endpoint_url()
7+
8+
with sync_playwright() as p:
9+
browser = p.chromium.connect_over_cdp(endpoint_url)
10+
context = browser.contexts[0]
11+
page = context.pages[0]
12+
page.goto("https://www.bing.com/turing/captcha/challenge")
13+
sb.sleep(3)
14+
sb.solve_captcha()
15+
sb.sleep(3)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from playwright.sync_api import sync_playwright
2+
from seleniumbase import sb_cdp
3+
4+
sb = sb_cdp.Chrome(locale="en")
5+
endpoint_url = sb.get_endpoint_url()
6+
7+
with sync_playwright() as p:
8+
browser = p.chromium.connect_over_cdp(endpoint_url)
9+
context = browser.contexts[0]
10+
page = context.pages[0]
11+
page.goto("https://www.bing.com/turing/captcha/challenge")
12+
sb.sleep(3)
13+
sb.solve_captcha()
14+
sb.sleep(3)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import asyncio
2+
from playwright.async_api import async_playwright
3+
from seleniumbase import cdp_driver
4+
5+
6+
async def main():
7+
driver = await cdp_driver.start_async()
8+
endpoint_url = driver.get_endpoint_url()
9+
10+
async with async_playwright() as p:
11+
browser = await p.chromium.connect_over_cdp(endpoint_url)
12+
context = browser.contexts[0]
13+
page = context.pages[0]
14+
await page.goto("https://copilot.microsoft.com")
15+
await page.wait_for_selector("textarea#userInput")
16+
await driver.sleep(1)
17+
query = "Playwright Python connect_over_cdp() sync example"
18+
await page.fill("textarea#userInput", query)
19+
await page.click('button[data-testid="submit-button"]')
20+
await driver.sleep(3)
21+
await driver.solve_captcha()
22+
await page.wait_for_selector('button[data-testid*="-thumbs-up"]')
23+
await driver.sleep(4)
24+
await page.click('button[data-testid*="scroll-to-bottom"]')
25+
await driver.sleep(3)
26+
chat_results = '[data-testid="highlighted-chats"]'
27+
result = await page.locator(chat_results).inner_text()
28+
print(result.replace("\n\n", " \n"))
29+
30+
31+
if __name__ == "__main__":
32+
loop = asyncio.new_event_loop()
33+
loop.run_until_complete(main())

0 commit comments

Comments
 (0)