1. 获取元素的文本方法:inner_text()
# 1. 获取元素的文本方法:inner_text()
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") text = page.locator("h1").inner_text() print("获取的文本:", text) browser.close()
# 通过 inner_text() 方法获取页面中指定元素的文本内容。例如,获取 <h1> 标签的文本。# 2. 获取所有匹配元素的文本方法:all_inner_texts()
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") texts = page.locator("p").all_inner_texts() print("获取的所有文本:", texts) browser.close()
# 使用 all_inner_texts() 获取所有匹配元素的文本内容,例如获取页面中所有 <p> 标签文本。# 3. 获取匹配元素的属性值方法:get_attribute()
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") img_src = page.locator("img").get_attribute("src") print("图片的 src 属性:", img_src) browser.close()
#通过 get_attribute() 获取指定元素的属性值,例如获取 <img> 标签的 src 属性# 4.获取 input 元素的 value 属性值方法:input_value()
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") value = page.locator("#user").input_value() print("输入框的值:", value) browser.close()
# 使用 input_value() 获取输入框的值,例如获取 <input id="user"> 的值。# 5. 获取元素的位置方法:bounding_box()
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") rect = page.locator("#box11").bounding_box() print("元素的位置:", rect) browser.close()
# 通过 bounding_box() 获取元素的位置和尺寸信息,例如获取 <div id="box11"> 的位置。# 6. 统计匹配到的元素个数方法:count()
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") count = page.locator("li").count() print("匹配的元素个数:", count) browser.close()
# 使用 count() 统计页面中匹配到的元素数量,例如统计所有 <li> 标签的数量。# 在自动化测试和数据抓取中,等待机制是确保操作成功的关键。
# Playwright 提供了强大的等待功能,帮助开发者灵活应对各种场景。
# 等待机制Playwright 默认提供了智能等待机制,等待时间为 30 秒(30000 毫秒)。你可以根据需要调整等待时间或使用特定的等待条件。# 7. 修改默认的等待时间方法:set_default_timeout(timeout)
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.set_default_timeout(5000) # 设置默认等待时间为 5000 毫秒 page.goto("https://example.com") browser.close()
# 通过 set_default_timeout() 方法设置全局等待时间,单位为毫秒。例如,将默认等待时间设置为 5000 毫秒。# 8. 等待页面处于某种状态方法:wait_for_load_state(state)
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.wait_for_load_state("load") # 等待页面加载完成 print("页面加载完成!") browser.close()
"""
使用 wait_for_load_state() 等待页面加载状态达到指定条件。支持的状态包括:'load':等待所有资源加载完成。'domcontentloaded':等待 HTML 文档解析完成。'networkidle':等待网络空闲(不推荐使用)。
"""# 9. 等待页面跳转到指定的 URL方法:wait_for_url(url)
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.wait_for_url("**/target.html") # 等待页面跳转到 target.html print("页面跳转完成!") browser.close()
# 通过 wait_for_url() 等待页面跳转到指定的 URL。可以使用通配符 ** 匹配路径。# 10. 等待指定的时间方法:wait_for_timeout(timeout)
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.wait_for_timeout(10000) # 等待 10000 毫秒 print("等待完成!") browser.close()
# 使用 wait_for_timeout() 暂停执行指定时间,单位为毫秒。例如,暂停 10000 毫秒。# 11. 鼠标操作,鼠标点击,方法:page.mouse.click(x, y)
"""
通过 page.mouse.click() 方法模拟鼠标点击操作,指定点击的位置、按钮类型、点击次数和延迟时间。
参数:x:X轴位置y:Y轴位置button:left、right、middleclick_count:点击次数,默认值为 1delay:点击按下到释放的时间(以毫秒为单位)
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.mouse.click(100, 200, button="left", click_count=1, delay=100) print("鼠标点击完成!") browser.close()# 12. 按下鼠标方法:page.mouse.down()
"""
参数:x:X轴位置y:Y轴位置button:left、right、middle
通过 page.mouse.down() 方法模拟鼠标按下操作,通常需要先移动鼠标到指定位置。"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.mouse.move(100, 200) page.mouse.down(button="left") print("鼠标按下完成!") browser.close()# 13. 移动鼠标方法:page.mouse.move()
"""
参数:x:X轴位置y:Y轴位置
通过 page.mouse.move() 方法移动鼠标到指定位置。
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.mouse.move(100, 200) print("鼠标移动完成!") browser.close()# 14. 释放鼠标方法:page.mouse.up()
"""
参数:button:left、right、middleclick_count:点击次数,默认值为 1
通过 page.mouse.up() 方法释放鼠标按钮,通常与 page.mouse.down() 配合使用。
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.mouse.move(100, 200) page.mouse.down(button="left") page.mouse.up(button="left") print("鼠标释放完成!") browser.close()# 15. 元素拖拽综合方法方法:page.drag_and_drop(source, target)
"""
参数:source:拖拽元素的定位表达式target:目标元素的定位表达式
通过 page.drag_and_drop() 方法实现元素的拖拽操作,指定拖拽的源元素和目标元素。
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.drag_and_drop('//*[@id="box22"]', '//*[@id="box11"]') print("元素拖拽完成!") browser.close()# 16. 鼠标滚轮滚动 方法:page.mouse.wheel(delta_x, delta_y)
"""
参数:delta_x:要水平滚动的像素delta_y:要垂直滚动的像素
通过 page.mouse.wheel() 方法模拟鼠标滚轮滚动,指定水平和垂直滚动的像素值。
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://example.com") page.mouse.wheel(0, 100) # 向下滚动 100 像素 print("鼠标滚动完成!") browser.close()# 17 使用 Playwright 实现拖拽操作案例代码
"""
启动浏览器和页面:使用 launch() 启动浏览器,并通过 new_page() 创建一个新页面。
加载页面:使用 goto() 加载本地 HTML 文件。
等待元素加载:使用 wait_for_selector() 确保拖拽和目标元素已经加载完成。
执行拖拽操作:使用 drag_and_drop() 方法实现拖拽操作,指定拖拽的源元素和目标元素。
关闭浏览器:完成操作后关闭浏览器。Playwright 提供了强大的鼠标操作功能,尤其是 drag_and_drop 方法,可以轻松实现复杂的拖拽操作
"""
from playwright.sync_api import sync_playwright
def run_drag_and_drop(): with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("file:///path/to/your/drag_and_drop.html") # 替换为你的本地文件路径 page.wait_for_selector("#box22") page.wait_for_selector("#box11") # 拖拽操作 page.drag_and_drop("#box22", "#box11") print("拖拽操作完成!") browser.close()
# 调用函数
run_drag_and_drop()# 18 按下键盘按键方法:keyboard.down(key)
"""
通过 keyboard.down() 方法模拟按下指定按键,例如按下 Enter 键。
参数:key:要按下的键的名称或要生成的字符
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://www.toptal.com/developers/keycode") page.wait_for_timeout(2000) page.keyboard.down("Enter") # 按下 Enter 键 page.wait_for_timeout(2000) browser.close()# 19 松开键盘 按键方法:keyboard.up(key)
"""
通过 keyboard.press() 方法模拟按下并松开指定按键,例如按下并松开 A 键。
参数:key:要松开的键的名称或要生成的字符
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://www.toptal.com/developers/keycode") page.wait_for_timeout(2000) page.keyboard.press("A") # 按下并松开 A 键 page.wait_for_timeout(2000) browser.close()# 20 使用键盘进行文本输入方法:keyboard.type(text)
"""
通过 keyboard.type() 方法在焦点元素中输入文本
参数:text:要输入到焦点元素中的文本
"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p: browser = p.chromium.launch(headless=False) page = browser.new_page() page.goto("https://www.baidu.com") page.wait_for_timeout(2000) page.locator("#kw").focus() # 定位到百度搜索框 page.wait_for_timeout(2000) page.keyboard.type("1234567890") # 输入文本 page.wait_for_timeout(2000) browser.close()