__init__.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. from __future__ import annotations
  2. try:
  3. from curl_cffi.requests import Session, Response
  4. from .curl_cffi import StreamResponse, StreamSession, FormData
  5. has_curl_cffi = True
  6. except ImportError:
  7. from typing import Type as Session, Type as Response
  8. from .aiohttp import StreamResponse, StreamSession, FormData
  9. has_curl_cffi = False
  10. try:
  11. import webview
  12. import asyncio
  13. has_webview = True
  14. except ImportError:
  15. has_webview = False
  16. from .raise_for_status import raise_for_status
  17. from ..webdriver import WebDriver, WebDriverSession
  18. from ..webdriver import bypass_cloudflare, get_driver_cookies
  19. from ..errors import MissingRequirementsError
  20. from .defaults import DEFAULT_HEADERS, WEBVIEW_HAEDERS
  21. async def get_args_from_webview(url: str) -> dict:
  22. if not has_webview:
  23. raise MissingRequirementsError('Install "webview" package')
  24. window = webview.create_window("", url, hidden=True)
  25. await asyncio.sleep(2)
  26. body = None
  27. while body is None:
  28. try:
  29. await asyncio.sleep(1)
  30. body = window.dom.get_element("body:not(.no-js)")
  31. except:
  32. ...
  33. headers = {
  34. **WEBVIEW_HAEDERS,
  35. "User-Agent": window.evaluate_js("this.navigator.userAgent"),
  36. "Accept-Language": window.evaluate_js("this.navigator.language"),
  37. "Referer": window.real_url
  38. }
  39. cookies = [list(*cookie.items()) for cookie in window.get_cookies()]
  40. cookies = {name: cookie.value for name, cookie in cookies}
  41. window.destroy()
  42. return {"headers": headers, "cookies": cookies}
  43. def get_args_from_browser(
  44. url: str,
  45. webdriver: WebDriver = None,
  46. proxy: str = None,
  47. timeout: int = 120,
  48. do_bypass_cloudflare: bool = True,
  49. virtual_display: bool = False
  50. ) -> dict:
  51. """
  52. Create a Session object using a WebDriver to handle cookies and headers.
  53. Args:
  54. url (str): The URL to navigate to using the WebDriver.
  55. webdriver (WebDriver, optional): The WebDriver instance to use.
  56. proxy (str, optional): Proxy server to use for the Session.
  57. timeout (int, optional): Timeout in seconds for the WebDriver.
  58. Returns:
  59. Session: A Session object configured with cookies and headers from the WebDriver.
  60. """
  61. with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=virtual_display) as driver:
  62. if do_bypass_cloudflare:
  63. bypass_cloudflare(driver, url, timeout)
  64. headers = {
  65. **DEFAULT_HEADERS,
  66. 'referer': url,
  67. }
  68. if not hasattr(driver, "requests"):
  69. headers["user-agent"] = driver.execute_script("return navigator.userAgent")
  70. else:
  71. for request in driver.requests:
  72. if request.url.startswith(url):
  73. for key, value in request.headers.items():
  74. if key in (
  75. "accept-encoding",
  76. "accept-language",
  77. "user-agent",
  78. "sec-ch-ua",
  79. "sec-ch-ua-platform",
  80. "sec-ch-ua-arch",
  81. "sec-ch-ua-full-version",
  82. "sec-ch-ua-platform-version",
  83. "sec-ch-ua-bitness"
  84. ):
  85. headers[key] = value
  86. break
  87. cookies = get_driver_cookies(driver)
  88. return {
  89. 'cookies': cookies,
  90. 'headers': headers,
  91. }
  92. def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session:
  93. if not has_curl_cffi:
  94. raise MissingRequirementsError('Install "curl_cffi" package')
  95. args = get_args_from_browser(url, webdriver, proxy, timeout)
  96. return Session(
  97. **args,
  98. proxies={"https": proxy, "http": proxy},
  99. timeout=timeout,
  100. impersonate="chrome"
  101. )