selenium爬虫时常常会遇到stale element reference: element is not attached to the page document加了显式等待感觉没什么用

2022-08-09,,,

翻页输出东方财富利润表,有时候可以输出三份dataframe,有时候就两份甚至直接element is not attached to the page document,加了显式等待感觉好像没啥用,(另外是不是if中套test会好一点)求帮助谢谢

from selenium import webdriver
import pandas as pd
import numpy as np
from selenium.webdriver.common.by import By
from lxml import etree
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

stock = 'SZ300278'
url='http://f10.eastmoney.com/f10_v2/FinanceAnalysis.aspx?code=%s' %stock
browser = webdriver.Chrome()
browser.get(url)
wait=WebDriverWait(browser, 10)
wait.until(EC.element_to_be_clickable((By.XPATH,'//div[@class="main"]/descendant::ul[@id="lrb_ul"]/li[2]')))
button=browser.find_element(By.XPATH,'//div[@class="main"]/descendant::ul[@id="lrb_ul"]/li[2]')
button.click()
wait.until(EC.element_to_be_clickable((By.ID,'lrb_next')))
next=browser.find_element_by_id('lrb_next')

content_wait=wait.until(EC.presence_of_element_located((By.ID,'report_lrb')))
element=browser.find_element_by_id('report_lrb')
th_content = element.find_elements_by_tag_name('th')
td_content = element.find_elements_by_tag_name('td')
list=[]
for th in th_content:
    list.append(th.text)
for td in td_content:
    list.append(td.text)
col = len(element.find_elements_by_css_selector('tr:nth-child(1) th'))
list = [list[i:i + col] for i in range(0, len(list), col)]
for x in range(list.count(['', '', '', '', '', ''])):
    list.remove(['', '', '', '', '', ''])
lrb_table = pd.DataFrame(list)
print(lrb_table)

while next.get_attribute('style')=='display: inline;':
    try:
        wait.until(EC.element_to_be_clickable((By.ID,'lrb_next')))
        time.sleep(1)
        next.click()
        next_wait=wait.until(EC.element_to_be_clickable((By.ID,'lrb_next')))
        content_wait=wait.until(EC.presence_of_element_located((By.ID,'report_lrb')))
        content_wait=wait.until(EC.presence_of_element_located((By.ID,'report_lrb')))
        element=browser.find_element_by_id('report_lrb')
        th_content = element.find_elements_by_tag_name('th')
        td_content = element.find_elements_by_tag_name('td')
        list=[]
        for th in th_content:
            list.append(th.text)
        for td in td_content:
            list.append(td.text)
        col = len(element.find_elements_by_css_selector('tr:nth-child(1) th'))
        list = [list[i:i + col] for i in range(0, len(list), col)]
        for x in range(list.count(['', '', '', '', '', ''])):
            list.remove(['', '', '', '', '', ''])
        lrb_table1 = pd.DataFrame(list)
        print(lrb_table1)
    except:
        print('打印完成')

本文地址:https://blog.csdn.net/weixin_49101060/article/details/107160044

《selenium爬虫时常常会遇到stale element reference: element is not attached to the page document加了显式等待感觉没什么用.doc》

下载本文的Word格式文档,以方便收藏与打印。