myChrome.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. from selenium.webdriver.chrome.options import Options
  2. from selenium.webdriver.common.keys import Keys
  3. from selenium.webdriver.common.action_chains import ActionChains
  4. from selenium import webdriver
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.support import expected_conditions as EC
  7. from selenium.webdriver.common.by import By
  8. from selenium.common.exceptions import NoSuchElementException
  9. from selenium.common.exceptions import TimeoutException
  10. from selenium.common.exceptions import StaleElementReferenceException, InvalidSelectorException
  11. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  12. from selenium.webdriver.support.ui import Select
  13. from selenium.webdriver import ActionChains
  14. from selenium.webdriver.common.by import By
  15. import sys
  16. desired_capabilities = DesiredCapabilities.CHROME
  17. desired_capabilities["pageLoadStrategy"] = "none"
  18. class MyChrome(webdriver.Chrome, webdriver.Remote):
  19. def __init__(self, mode='local_driver', *args, **kwargs):
  20. self.iframe_env = False # 现在的环境是root还是iframe
  21. self.mode = mode
  22. if mode == "local_driver":
  23. webdriver.Chrome.__init__(self, *args, **kwargs)
  24. elif mode == "remote_driver":
  25. webdriver.Remote.__init__(self, *args, **kwargs)
  26. # super().__init__(*args, **kwargs) # 调用父类的 __init__
  27. # def find_element(self, by=By.ID, value=None, iframe=False):
  28. # # 在这里改变查找元素的行为
  29. # if self.iframe_env:
  30. # super().switch_to.default_content()
  31. # self.iframe_env = False
  32. # if iframe:
  33. # # 获取所有的 iframe
  34. # try:
  35. # iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
  36. # except Exception as e:
  37. # print(e)
  38. # find_element = False
  39. # # 遍历所有的 iframe 并查找里面的元素
  40. # for iframe in iframes:
  41. # # 切换到 iframe
  42. # super().switch_to.default_content()
  43. # super().switch_to.frame(iframe)
  44. # self.iframe_env = True
  45. # try:
  46. # # 在 iframe 中查找元素
  47. # # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
  48. # element = super().find_element(by=by, value=value)
  49. # find_element = True
  50. # except NoSuchElementException as e:
  51. # print(f"No such element found in the iframe: {str(e)}")
  52. # except Exception as e:
  53. # print(f"Exception: {str(e)}")
  54. # # 完成操作后切回主文档
  55. # # super().switch_to.default_content()
  56. # if find_element:
  57. # return element
  58. # if not find_element:
  59. # raise NoSuchElementException
  60. # else:
  61. # return super().find_element(by=by, value=value)
  62. def find_element_recursive(self, by, value, frames):
  63. for frame in frames:
  64. try:
  65. try:
  66. self.switch_to.frame(frame)
  67. except StaleElementReferenceException:
  68. # If the frame has been refreshed, we need to switch to the parent frame first,
  69. self.switch_to.parent_frame()
  70. self.switch_to.frame(frame)
  71. try:
  72. # !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
  73. element = super(MyChrome, self).find_element(by=by, value=value)
  74. return element
  75. except NoSuchElementException:
  76. # Recurse into nested iframes
  77. nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
  78. if nested_frames:
  79. element = self.find_element_recursive(by, value, nested_frames)
  80. if element:
  81. return element
  82. except Exception as e:
  83. print(f"Exception while processing frame: {e}")
  84. raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
  85. def find_element(self, by=By.ID, value=None, iframe=False):
  86. self.switch_to.default_content() # Switch back to the main document
  87. self.iframe_env = False
  88. if iframe:
  89. frames = self.find_elements(By.CSS_SELECTOR, "iframe")
  90. if not frames:
  91. raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
  92. self.iframe_env = True
  93. element = self.find_element_recursive(by, value, frames)
  94. else:
  95. # Find element in the main document as normal
  96. element = super(MyChrome, self).find_element(by=by, value=value)
  97. return element
  98. # def find_elements(self, by=By.ID, value=None, iframe=False):
  99. # # 在这里改变查找元素的行为
  100. # if self.iframe_env:
  101. # super().switch_to.default_content()
  102. # self.iframe_env = False
  103. # if iframe:
  104. # # 获取所有的 iframe
  105. # iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
  106. # find_element = False
  107. # # 遍历所有的 iframe 并找到里面的元素
  108. # for iframe in iframes:
  109. # # 切换到 iframe
  110. # try:
  111. # super().switch_to.default_content()
  112. # super().switch_to.frame(iframe)
  113. # self.iframe_env = True
  114. # # 在 iframe 中查找元素
  115. # # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
  116. # elements = super().find_elements(by=by, value=value)
  117. # if len(elements) > 0:
  118. # find_element = True
  119. # # 完成操作后切回主文档
  120. # # super().switch_to.default_content()
  121. # if find_element:
  122. # return elements
  123. # except NoSuchElementException as e:
  124. # print(f"No such element found in the iframe: {str(e)}")
  125. # except Exception as e:
  126. # print(f"Exception: {str(e)}")
  127. # if not find_element:
  128. # raise NoSuchElementException
  129. # else:
  130. # return super().find_elements(by=by, value=value)
  131. def find_elements_recursive(self, by, value, frames):
  132. for frame in frames:
  133. try:
  134. try:
  135. self.switch_to.frame(frame)
  136. except StaleElementReferenceException:
  137. # If the frame has been refreshed, we need to switch to the parent frame first,
  138. self.switch_to.parent_frame()
  139. self.switch_to.frame(frame)
  140. # Directly find elements in the current frame
  141. elements = super(MyChrome, self).find_elements(by=by, value=value)
  142. if elements:
  143. return elements
  144. # Recursively search for elements in nested iframes
  145. nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
  146. if nested_frames:
  147. elements = self.find_elements_recursive(by, value, nested_frames)
  148. if elements:
  149. return elements
  150. except Exception as e:
  151. print(f"Exception while processing frame: {e}")
  152. raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
  153. def find_elements(self, by=By.ID, value=None, iframe=False):
  154. self.switch_to.default_content() # Switch back to the main document
  155. self.iframe_env = False
  156. if iframe:
  157. frames = self.find_elements(By.CSS_SELECTOR, "iframe")
  158. if not frames:
  159. return [] # Return an empty list if no iframes are found
  160. self.iframe_env = True
  161. elements = self.find_elements_recursive(by, value, frames)
  162. else:
  163. # Find elements in the main document as normal
  164. elements = super(MyChrome, self).find_elements(by=by, value=value)
  165. return elements
  166. class MyEdge(webdriver.Ie):
  167. def __init__(self, *args, **kwargs):
  168. self.iframe_env = False # 现在的环境是root还是iframe
  169. super().__init__(*args, **kwargs) # 调用父类的 __init__
  170. def find_element_recursive(self, by, value, frames):
  171. for frame in frames:
  172. try:
  173. try:
  174. self.switch_to.frame(frame)
  175. except StaleElementReferenceException:
  176. # If the frame has been refreshed, we need to switch to the parent frame first,
  177. self.switch_to.parent_frame()
  178. self.switch_to.frame(frame)
  179. try:
  180. # !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
  181. element = super(MyEdge, self).find_element(by=by, value=value)
  182. return element
  183. except NoSuchElementException:
  184. # Recurse into nested iframes
  185. nested_frames = super(MyEdge, self).find_elements(By.CSS_SELECTOR, "iframe")
  186. if nested_frames:
  187. element = self.find_element_recursive(by, value, nested_frames)
  188. if element:
  189. return element
  190. except Exception as e:
  191. print(f"Exception while processing frame: {e}")
  192. raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
  193. def find_element(self, by=By.ID, value=None, iframe=False):
  194. self.switch_to.default_content() # Switch back to the main document
  195. self.iframe_env = False
  196. if iframe:
  197. frames = self.find_elements(By.CSS_SELECTOR, "iframe")
  198. if not frames:
  199. raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
  200. self.iframe_env = True
  201. element = self.find_element_recursive(by, value, frames)
  202. else:
  203. # Find element in the main document as normal
  204. element = super(MyEdge, self).find_element(by=by, value=value)
  205. return element
  206. def find_elements_recursive(self, by, value, frames):
  207. for frame in frames:
  208. try:
  209. try:
  210. self.switch_to.frame(frame)
  211. except StaleElementReferenceException:
  212. # If the frame has been refreshed, we need to switch to the parent frame first,
  213. self.switch_to.parent_frame()
  214. self.switch_to.frame(frame)
  215. # Directly find elements in the current frame
  216. elements = super(MyEdge, self).find_elements(by=by, value=value)
  217. if elements:
  218. return elements
  219. # Recursively search for elements in nested iframes
  220. nested_frames = super(MyEdge, self).find_elements(By.CSS_SELECTOR, "iframe")
  221. if nested_frames:
  222. elements = self.find_elements_recursive(by, value, nested_frames)
  223. if elements:
  224. return elements
  225. except Exception as e:
  226. print(f"Exception while processing frame: {e}")
  227. raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
  228. def find_elements(self, by=By.ID, value=None, iframe=False):
  229. self.switch_to.default_content() # Switch back to the main document
  230. self.iframe_env = False
  231. if iframe:
  232. frames = self.find_elements(By.CSS_SELECTOR, "iframe")
  233. if not frames:
  234. return [] # Return an empty list if no iframes are found
  235. self.iframe_env = True
  236. elements = self.find_elements_recursive(by, value, frames)
  237. else:
  238. # Find elements in the main document as normal
  239. elements = super(MyEdge, self).find_elements(by=by, value=value)
  240. return elements
  241. # MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
  242. if sys.platform != "darwin":
  243. ES = True
  244. if ES: # 用自己写的ES版本
  245. import undetected_chromedriver_ES as uc
  246. else:
  247. import undetected_chromedriver as uc
  248. class MyUCChrome(uc.Chrome):
  249. def __init__(self, *args, **kwargs):
  250. self.iframe_env = False # 现在的环境是root还是iframe
  251. super().__init__(*args, **kwargs) # 调用父类的 __init__
  252. def find_element(self, by=By.ID, value=None, iframe=False):
  253. # 在这里改变查找元素的行为
  254. if self.iframe_env:
  255. super().switch_to.default_content()
  256. self.iframe_env = False
  257. if iframe:
  258. # 获取所有的 iframe
  259. try:
  260. iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
  261. except Exception as e:
  262. print(e)
  263. find_element = False
  264. # 遍历所有的 iframe 并找到里面的元素
  265. for iframe in iframes:
  266. # 切换到 iframe
  267. super().switch_to.default_content()
  268. super().switch_to.frame(iframe)
  269. self.iframe_env = True
  270. try:
  271. # 在 iframe 中查找元素
  272. # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
  273. element = super().find_element(by=by, value=value)
  274. find_element = True
  275. except NoSuchElementException as e:
  276. print(f"No such element found in the iframe: {str(e)}")
  277. except Exception as e:
  278. print(f"Exception: {str(e)}")
  279. # 完成操作后切回主文档
  280. # super().switch_to.default_content()
  281. if find_element:
  282. return element
  283. if not find_element:
  284. raise NoSuchElementException
  285. else:
  286. return super().find_element(by=by, value=value)
  287. def find_elements(self, by=By.ID, value=None, iframe=False):
  288. # 在这里改变查找元素的行为
  289. if self.iframe_env:
  290. super().switch_to.default_content()
  291. self.iframe_env = False
  292. if iframe:
  293. # 获取所有的 iframe
  294. iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
  295. find_element = False
  296. # 遍历所有的 iframe 并查找里面的元素
  297. for iframe in iframes:
  298. # 切换到 iframe
  299. try:
  300. super().switch_to.default_content()
  301. super().switch_to.frame(iframe)
  302. self.iframe_env = True
  303. # 在 iframe 中查找元素
  304. # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
  305. elements = super().find_elements(by=by, value=value)
  306. if len(elements) > 0:
  307. find_element = True
  308. # 完成操作后切回主文档
  309. # super().switch_to.default_content()
  310. if find_element:
  311. return elements
  312. except NoSuchElementException as e:
  313. print(f"No such element found in the iframe: {str(e)}")
  314. except Exception as e:
  315. print(f"Exception: {str(e)}")
  316. if not find_element:
  317. raise NoSuchElementException
  318. else:
  319. return super().find_elements(by=by, value=value)