123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347 |
- from selenium.webdriver.chrome.options import Options
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.action_chains import ActionChains
- from selenium import webdriver
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- from selenium.common.exceptions import NoSuchElementException
- from selenium.common.exceptions import TimeoutException
- from selenium.common.exceptions import StaleElementReferenceException, InvalidSelectorException
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
- from selenium.webdriver.support.ui import Select
- from selenium.webdriver import ActionChains
- from selenium.webdriver.common.by import By
- import sys
- desired_capabilities = DesiredCapabilities.CHROME
- desired_capabilities["pageLoadStrategy"] = "none"
- class MyChrome(webdriver.Chrome, webdriver.Remote):
- def __init__(self, mode='local_driver', *args, **kwargs):
- self.iframe_env = False # 现在的环境是root还是iframe
- self.mode = mode
- if mode == "local_driver":
- webdriver.Chrome.__init__(self, *args, **kwargs)
- elif mode == "remote_driver":
- webdriver.Remote.__init__(self, *args, **kwargs)
- # super().__init__(*args, **kwargs) # 调用父类的 __init__
- # def find_element(self, by=By.ID, value=None, iframe=False):
- # # 在这里改变查找元素的行为
- # if self.iframe_env:
- # super().switch_to.default_content()
- # self.iframe_env = False
- # if iframe:
- # # 获取所有的 iframe
- # try:
- # iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
- # except Exception as e:
- # print(e)
- # find_element = False
- # # 遍历所有的 iframe 并查找里面的元素
- # for iframe in iframes:
- # # 切换到 iframe
- # super().switch_to.default_content()
- # super().switch_to.frame(iframe)
- # self.iframe_env = True
- # try:
- # # 在 iframe 中查找元素
- # # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
- # element = super().find_element(by=by, value=value)
- # find_element = True
- # except NoSuchElementException as e:
- # print(f"No such element found in the iframe: {str(e)}")
- # except Exception as e:
- # print(f"Exception: {str(e)}")
- # # 完成操作后切回主文档
- # # super().switch_to.default_content()
- # if find_element:
- # return element
- # if not find_element:
- # raise NoSuchElementException
- # else:
- # return super().find_element(by=by, value=value)
- def find_element_recursive(self, by, value, frames):
- for frame in frames:
- try:
- try:
- self.switch_to.frame(frame)
- except StaleElementReferenceException:
- # If the frame has been refreshed, we need to switch to the parent frame first,
- self.switch_to.parent_frame()
- self.switch_to.frame(frame)
- try:
- # !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
- element = super(MyChrome, self).find_element(by=by, value=value)
- return element
- except NoSuchElementException:
- # Recurse into nested iframes
- nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
- if nested_frames:
- element = self.find_element_recursive(by, value, nested_frames)
- if element:
- return element
- except Exception as e:
- print(f"Exception while processing frame: {e}")
- raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
- def find_element(self, by=By.ID, value=None, iframe=False):
- self.switch_to.default_content() # Switch back to the main document
- self.iframe_env = False
- if iframe:
- frames = self.find_elements(By.CSS_SELECTOR, "iframe")
- if not frames:
- raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
- self.iframe_env = True
- element = self.find_element_recursive(by, value, frames)
- else:
- # Find element in the main document as normal
- element = super(MyChrome, self).find_element(by=by, value=value)
- return element
- # def find_elements(self, by=By.ID, value=None, iframe=False):
- # # 在这里改变查找元素的行为
- # if self.iframe_env:
- # super().switch_to.default_content()
- # self.iframe_env = False
- # if iframe:
- # # 获取所有的 iframe
- # iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
- # find_element = False
- # # 遍历所有的 iframe 并找到里面的元素
- # for iframe in iframes:
- # # 切换到 iframe
- # try:
- # super().switch_to.default_content()
- # super().switch_to.frame(iframe)
- # self.iframe_env = True
- # # 在 iframe 中查找元素
- # # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
- # elements = super().find_elements(by=by, value=value)
- # if len(elements) > 0:
- # find_element = True
- # # 完成操作后切回主文档
- # # super().switch_to.default_content()
- # if find_element:
- # return elements
- # except NoSuchElementException as e:
- # print(f"No such element found in the iframe: {str(e)}")
- # except Exception as e:
- # print(f"Exception: {str(e)}")
- # if not find_element:
- # raise NoSuchElementException
- # else:
- # return super().find_elements(by=by, value=value)
- def find_elements_recursive(self, by, value, frames):
- for frame in frames:
- try:
- try:
- self.switch_to.frame(frame)
- except StaleElementReferenceException:
- # If the frame has been refreshed, we need to switch to the parent frame first,
- self.switch_to.parent_frame()
- self.switch_to.frame(frame)
- # Directly find elements in the current frame
- elements = super(MyChrome, self).find_elements(by=by, value=value)
- if elements:
- return elements
- # Recursively search for elements in nested iframes
- nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
- if nested_frames:
- elements = self.find_elements_recursive(by, value, nested_frames)
- if elements:
- return elements
- except Exception as e:
- print(f"Exception while processing frame: {e}")
- raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
- def find_elements(self, by=By.ID, value=None, iframe=False):
- self.switch_to.default_content() # Switch back to the main document
- self.iframe_env = False
- if iframe:
- frames = self.find_elements(By.CSS_SELECTOR, "iframe")
- if not frames:
- return [] # Return an empty list if no iframes are found
- self.iframe_env = True
- elements = self.find_elements_recursive(by, value, frames)
- else:
- # Find elements in the main document as normal
- elements = super(MyChrome, self).find_elements(by=by, value=value)
- return elements
- class MyEdge(webdriver.Ie):
- def __init__(self, *args, **kwargs):
- self.iframe_env = False # 现在的环境是root还是iframe
- super().__init__(*args, **kwargs) # 调用父类的 __init__
- def find_element_recursive(self, by, value, frames):
- for frame in frames:
- try:
- try:
- self.switch_to.frame(frame)
- except StaleElementReferenceException:
- # If the frame has been refreshed, we need to switch to the parent frame first,
- self.switch_to.parent_frame()
- self.switch_to.frame(frame)
- try:
- # !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
- element = super(MyEdge, self).find_element(by=by, value=value)
- return element
- except NoSuchElementException:
- # Recurse into nested iframes
- nested_frames = super(MyEdge, self).find_elements(By.CSS_SELECTOR, "iframe")
- if nested_frames:
- element = self.find_element_recursive(by, value, nested_frames)
- if element:
- return element
- except Exception as e:
- print(f"Exception while processing frame: {e}")
- raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
- def find_element(self, by=By.ID, value=None, iframe=False):
- self.switch_to.default_content() # Switch back to the main document
- self.iframe_env = False
- if iframe:
- frames = self.find_elements(By.CSS_SELECTOR, "iframe")
- if not frames:
- raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
- self.iframe_env = True
- element = self.find_element_recursive(by, value, frames)
- else:
- # Find element in the main document as normal
- element = super(MyEdge, self).find_element(by=by, value=value)
- return element
- def find_elements_recursive(self, by, value, frames):
- for frame in frames:
- try:
- try:
- self.switch_to.frame(frame)
- except StaleElementReferenceException:
- # If the frame has been refreshed, we need to switch to the parent frame first,
- self.switch_to.parent_frame()
- self.switch_to.frame(frame)
- # Directly find elements in the current frame
- elements = super(MyEdge, self).find_elements(by=by, value=value)
- if elements:
- return elements
- # Recursively search for elements in nested iframes
- nested_frames = super(MyEdge, self).find_elements(By.CSS_SELECTOR, "iframe")
- if nested_frames:
- elements = self.find_elements_recursive(by, value, nested_frames)
- if elements:
- return elements
- except Exception as e:
- print(f"Exception while processing frame: {e}")
- raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
- def find_elements(self, by=By.ID, value=None, iframe=False):
- self.switch_to.default_content() # Switch back to the main document
- self.iframe_env = False
- if iframe:
- frames = self.find_elements(By.CSS_SELECTOR, "iframe")
- if not frames:
- return [] # Return an empty list if no iframes are found
- self.iframe_env = True
- elements = self.find_elements_recursive(by, value, frames)
- else:
- # Find elements in the main document as normal
- elements = super(MyEdge, self).find_elements(by=by, value=value)
- return elements
- # MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
- if sys.platform != "darwin":
- ES = True
- if ES: # 用自己写的ES版本
- import undetected_chromedriver_ES as uc
- else:
- import undetected_chromedriver as uc
- class MyUCChrome(uc.Chrome):
- def __init__(self, *args, **kwargs):
- self.iframe_env = False # 现在的环境是root还是iframe
- super().__init__(*args, **kwargs) # 调用父类的 __init__
- def find_element(self, by=By.ID, value=None, iframe=False):
- # 在这里改变查找元素的行为
- if self.iframe_env:
- super().switch_to.default_content()
- self.iframe_env = False
- if iframe:
- # 获取所有的 iframe
- try:
- iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
- except Exception as e:
- print(e)
- find_element = False
- # 遍历所有的 iframe 并找到里面的元素
- for iframe in iframes:
- # 切换到 iframe
- super().switch_to.default_content()
- super().switch_to.frame(iframe)
- self.iframe_env = True
- try:
- # 在 iframe 中查找元素
- # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
- element = super().find_element(by=by, value=value)
- find_element = True
- except NoSuchElementException as e:
- print(f"No such element found in the iframe: {str(e)}")
- except Exception as e:
- print(f"Exception: {str(e)}")
- # 完成操作后切回主文档
- # super().switch_to.default_content()
- if find_element:
- return element
- if not find_element:
- raise NoSuchElementException
- else:
- return super().find_element(by=by, value=value)
- def find_elements(self, by=By.ID, value=None, iframe=False):
- # 在这里改变查找元素的行为
- if self.iframe_env:
- super().switch_to.default_content()
- self.iframe_env = False
- if iframe:
- # 获取所有的 iframe
- iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
- find_element = False
- # 遍历所有的 iframe 并查找里面的元素
- for iframe in iframes:
- # 切换到 iframe
- try:
- super().switch_to.default_content()
- super().switch_to.frame(iframe)
- self.iframe_env = True
- # 在 iframe 中查找元素
- # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
- elements = super().find_elements(by=by, value=value)
- if len(elements) > 0:
- find_element = True
- # 完成操作后切回主文档
- # super().switch_to.default_content()
- if find_element:
- return elements
- except NoSuchElementException as e:
- print(f"No such element found in the iframe: {str(e)}")
- except Exception as e:
- print(f"Exception: {str(e)}")
- if not find_element:
- raise NoSuchElementException
- else:
- return super().find_elements(by=by, value=value)
|