cookies.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. from __future__ import annotations
  2. import os
  3. import time
  4. import json
  5. try:
  6. from platformdirs import user_config_dir
  7. has_platformdirs = True
  8. except ImportError:
  9. has_platformdirs = False
  10. try:
  11. from browser_cookie3 import (
  12. chrome, chromium, opera, opera_gx,
  13. brave, edge, vivaldi, firefox,
  14. _LinuxPasswordManager, BrowserCookieError
  15. )
  16. has_browser_cookie3 = True
  17. except ImportError:
  18. has_browser_cookie3 = False
  19. from .typing import Dict, Cookies
  20. from .errors import MissingRequirementsError
  21. from . import debug
  22. class CookiesConfig():
  23. cookies: Dict[str, Cookies] = {}
  24. cookies_dir: str = "./har_and_cookies"
  25. DOMAINS = [
  26. ".bing.com",
  27. ".meta.ai",
  28. ".google.com",
  29. "www.whiterabbitneo.com",
  30. "huggingface.co",
  31. "chat.reka.ai",
  32. ]
  33. if has_browser_cookie3 and os.environ.get('DBUS_SESSION_BUS_ADDRESS') == "/dev/null":
  34. _LinuxPasswordManager.get_password = lambda a, b: b"secret"
  35. def get_cookies(domain_name: str = '', raise_requirements_error: bool = True, single_browser: bool = False) -> Dict[str, str]:
  36. """
  37. Load cookies for a given domain from all supported browsers and cache the results.
  38. Args:
  39. domain_name (str): The domain for which to load cookies.
  40. Returns:
  41. Dict[str, str]: A dictionary of cookie names and values.
  42. """
  43. if domain_name in CookiesConfig.cookies:
  44. return CookiesConfig.cookies[domain_name]
  45. cookies = load_cookies_from_browsers(domain_name, raise_requirements_error, single_browser)
  46. CookiesConfig.cookies[domain_name] = cookies
  47. return cookies
  48. def set_cookies(domain_name: str, cookies: Cookies = None) -> None:
  49. if cookies:
  50. CookiesConfig.cookies[domain_name] = cookies
  51. elif domain_name in CookiesConfig.cookies:
  52. CookiesConfig.cookies.pop(domain_name)
  53. def load_cookies_from_browsers(domain_name: str, raise_requirements_error: bool = True, single_browser: bool = False) -> Cookies:
  54. """
  55. Helper function to load cookies from various browsers.
  56. Args:
  57. domain_name (str): The domain for which to load cookies.
  58. Returns:
  59. Dict[str, str]: A dictionary of cookie names and values.
  60. """
  61. if not has_browser_cookie3:
  62. if raise_requirements_error:
  63. raise MissingRequirementsError('Install "browser_cookie3" package')
  64. return {}
  65. cookies = {}
  66. for cookie_fn in [_g4f, chrome, chromium, opera, opera_gx, brave, edge, vivaldi, firefox]:
  67. try:
  68. cookie_jar = cookie_fn(domain_name=domain_name)
  69. if len(cookie_jar) and debug.logging:
  70. print(f"Read cookies from {cookie_fn.__name__} for {domain_name}")
  71. for cookie in cookie_jar:
  72. if cookie.name not in cookies:
  73. if not cookie.expires or cookie.expires > time.time():
  74. cookies[cookie.name] = cookie.value
  75. if single_browser and len(cookie_jar):
  76. break
  77. except BrowserCookieError:
  78. pass
  79. except Exception as e:
  80. if debug.logging:
  81. print(f"Error reading cookies from {cookie_fn.__name__} for {domain_name}: {e}")
  82. return cookies
  83. def set_cookies_dir(dir: str) -> None:
  84. CookiesConfig.cookies_dir = dir
  85. def get_cookies_dir() -> str:
  86. return CookiesConfig.cookies_dir
  87. def read_cookie_files(dirPath: str = None):
  88. def get_domain(v: dict) -> str:
  89. host = [h["value"] for h in v['request']['headers'] if h["name"].lower() in ("host", ":authority")]
  90. if not host:
  91. return
  92. host = host.pop()
  93. for d in DOMAINS:
  94. if d in host:
  95. return d
  96. harFiles = []
  97. cookieFiles = []
  98. for root, dirs, files in os.walk(CookiesConfig.cookies_dir if dirPath is None else dirPath):
  99. for file in files:
  100. if file.endswith(".har"):
  101. harFiles.append(os.path.join(root, file))
  102. elif file.endswith(".json"):
  103. cookieFiles.append(os.path.join(root, file))
  104. CookiesConfig.cookies = {}
  105. for path in harFiles:
  106. with open(path, 'rb') as file:
  107. try:
  108. harFile = json.load(file)
  109. except json.JSONDecodeError:
  110. # Error: not a HAR file!
  111. continue
  112. if debug.logging:
  113. print("Read .har file:", path)
  114. new_cookies = {}
  115. for v in harFile['log']['entries']:
  116. domain = get_domain(v)
  117. if domain is None:
  118. continue
  119. v_cookies = {}
  120. for c in v['request']['cookies']:
  121. v_cookies[c['name']] = c['value']
  122. if len(v_cookies) > 0:
  123. CookiesConfig.cookies[domain] = v_cookies
  124. new_cookies[domain] = len(v_cookies)
  125. if debug.logging:
  126. for domain, new_values in new_cookies.items():
  127. print(f"Cookies added: {new_values} from {domain}")
  128. for path in cookieFiles:
  129. with open(path, 'rb') as file:
  130. try:
  131. cookieFile = json.load(file)
  132. except json.JSONDecodeError:
  133. # Error: not a json file!
  134. continue
  135. if not isinstance(cookieFile, list):
  136. continue
  137. if debug.logging:
  138. print("Read cookie file:", path)
  139. new_cookies = {}
  140. for c in cookieFile:
  141. if isinstance(c, dict) and "domain" in c:
  142. if c["domain"] not in new_cookies:
  143. new_cookies[c["domain"]] = {}
  144. new_cookies[c["domain"]][c["name"]] = c["value"]
  145. for domain, new_values in new_cookies.items():
  146. if debug.logging:
  147. print(f"Cookies added: {len(new_values)} from {domain}")
  148. CookiesConfig.cookies[domain] = new_values
  149. def _g4f(domain_name: str) -> list:
  150. """
  151. Load cookies from the 'g4f' browser (if exists).
  152. Args:
  153. domain_name (str): The domain for which to load cookies.
  154. Returns:
  155. list: List of cookies.
  156. """
  157. if not has_platformdirs:
  158. return []
  159. user_data_dir = user_config_dir("g4f")
  160. cookie_file = os.path.join(user_data_dir, "Default", "Cookies")
  161. return [] if not os.path.exists(cookie_file) else chrome(cookie_file, domain_name)