dy2.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. #!/usr/bin/python
  2. # coding:utf-8
  3. # @FileName: liveMan.py
  4. # @Time: 2024/1/2 21:51
  5. # @Author: bubu
  6. # @Project: douyinLiveWebFetcher
  7. import gzip
  8. import random
  9. import re
  10. import string
  11. import requests
  12. import websocket
  13. from protobuf.douyin import *
  14. def generateMsToken(length=107):
  15. """
  16. 产生请求头部cookie中的msToken字段,其实为随机的107位字符
  17. :param length:字符位数
  18. :return:msToken
  19. """
  20. random_str = ''
  21. base_str = string.ascii_letters + string.digits + '=_'
  22. _len = len(base_str) - 1
  23. for _ in range(length):
  24. random_str += base_str[random.randint(0, _len)]
  25. return random_str
  26. def generateTtwid():
  27. """
  28. 产生请求头部cookie中的ttwid字段,访问抖音网页版直播间首页可以获取到响应cookie中的ttwid
  29. :return: ttwid
  30. """
  31. url = "https://live.douyin.com/"
  32. headers = {
  33. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
  34. "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  35. }
  36. try:
  37. response = requests.get(url, headers=headers)
  38. response.raise_for_status()
  39. except Exception as err:
  40. print("【X】request the live url error: ", err)
  41. else:
  42. return response.cookies.get('ttwid')
  43. class DouyinLiveWebFetcher:
  44. def __init__(self, live_id):
  45. """
  46. 直播间弹幕抓取对象
  47. :param live_id: 直播间的直播id,打开直播间web首页的链接如:https://live.douyin.com/261378947940,
  48. 其中的261378947940即是live_id
  49. """
  50. self.__ttwid = None
  51. self.__room_id = None
  52. self.is_connected = None
  53. self.live_id = live_id
  54. self.live_url = "https://live.douyin.com/"
  55. self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " \
  56. "Chrome/120.0.0.0 Safari/537.36"
  57. def send_heartbeat(self, ws):
  58. import time, threading
  59. def heartbeat():
  60. while True:
  61. time.sleep(15)#每15秒发送一次心跳
  62. if self.is_connected:
  63. ws.send("hi")#使用实际的心跳消息格式
  64. else:
  65. print( "Connection lost, stopping heartbeat.")
  66. return
  67. threading.Thread(target=heartbeat).start()
  68. def start(self):
  69. self._connectWebSocket()
  70. def stop(self):
  71. self.ws.close()
  72. @property
  73. def ttwid(self):
  74. """
  75. 产生请求头部cookie中的ttwid字段,访问抖音网页版直播间首页可以获取到响应cookie中的ttwid
  76. :return: ttwid
  77. """
  78. if self.__ttwid:
  79. return self.__ttwid
  80. headers = {
  81. "User-Agent": self.user_agent,
  82. }
  83. try:
  84. response = requests.get(self.live_url, headers=headers)
  85. response.raise_for_status()
  86. except Exception as err:
  87. print("【X】Request the live url error: ", err)
  88. else:
  89. self.__ttwid = response.cookies.get('ttwid')
  90. return self.__ttwid
  91. @property
  92. def room_id(self):
  93. """
  94. 根据直播间的地址获取到真正的直播间roomId,有时会有错误,可以重试请求解决
  95. :return:room_id
  96. """
  97. if self.__room_id:
  98. return self.__room_id
  99. url = self.live_url + self.live_id
  100. headers = {
  101. "User-Agent": self.user_agent,
  102. "cookie": f"ttwid={self.ttwid}&msToken={generateMsToken()}; __ac_nonce=0123407cc00a9e438deb4",
  103. }
  104. try:
  105. response = requests.get(url, headers=headers)
  106. response.raise_for_status()
  107. except Exception as err:
  108. print("【X】Request the live room url error: ", err)
  109. else:
  110. match = re.search(r'roomId\\":\\"(\d+)\\"', response.text)
  111. if match is None or len(match.groups()) < 1:
  112. print("【X】No match found for roomId")
  113. self.__room_id = match.group(1)
  114. return self.__room_id
  115. def _connectWebSocket(self):
  116. """
  117. 连接抖音直播间websocket服务器,请求直播间数据
  118. """
  119. # wss = f"wss://webcast3-ws-web-lq.douyin.com/webcast/im/push/v2/?" \
  120. # f"app_name=douyin_web&version_code=180800&webcast_sdk_version=1.3.0&update_version_code=1.3.0" \
  121. # f"&compress=gzip" \
  122. # f"&internal_ext=internal_src:dim|wss_push_room_id:{self.room_id}|wss_push_did:{self.room_id}" \
  123. # f"|dim_log_id:202302171547011A160A7BAA76660E13ED|fetch_time:1676620021641|seq:1|wss_info:0-1676" \
  124. # f"620021641-0-0|wrds_kvs:WebcastRoomStatsMessage-1676620020691146024_WebcastRoomRankMessage-167661" \
  125. # f"9972726895075_AudienceGiftSyncData-1676619980834317696_HighlightContainerSyncData-2&cursor=t-1676" \
  126. # f"620021641_r-1_d-1_u-1_h-1" \
  127. # f"&host=https://live.douyin.com&aid=6383&live_id=1" \
  128. # f"&did_rule=3&debug=false&endpoint=live_pc&support_wrds=1&" \
  129. # f"im_path=/webcast/im/fetch/&user_unique_id={self.room_id}&" \
  130. # f"device_platform=web&cookie_enabled=true&screen_width=1440&screen_height=900&browser_language=zh&" \
  131. # f"browser_platform=MacIntel&browser_name=Mozilla&" \
  132. # f"browser_version=5.0%20(Macintosh;%20Intel%20Mac%20OS%20X%2010_15_7)%20AppleWebKit/537.36%20(KHTML,%20" \
  133. # f"like%20Gecko)%20Chrome/110.0.0.0%20Safari/537.36&" \
  134. # f"browser_online=true&tz_name=Asia/Shanghai&identity=audience&" \
  135. # f"room_id={self.room_id}&heartbeatDuration=0&signature=00000000"
  136. wss = "wss://webcast5-ws-web-lq.douyin.com/webcast/im/push/v2/?app_name=douyin_web&version_code=180800&webcast_sdk_version=1.0.14-beta.0&update_version_code=1.0.14-beta.0&compress=gzip&device_platform=web&cookie_enabled=true&screen_width=2048&screen_height=1152&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_version=5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/126.0.0.0%20Safari/537.36%20Edg/126.0.0.0&browser_online=true&tz_name=Etc/GMT-8&cursor=h-7383323426352862262_t-1719063974519_r-1_d-1_u-1&internal_ext=internal_src:dim|wss_push_room_id:7383264938631973686|wss_push_did:7293153952199050788|first_req_ms:1719063974385|fetch_time:1719063974519|seq:1|wss_info:0-1719063974519-0-0|wrds_v:7383323492227230262&host=https://live.douyin.com&aid=6383&live_id=1&did_rule=3&endpoint=live_pc&support_wrds=1&user_unique_id=7293153952199050788&im_path=/webcast/im/fetch/&identity=audience&need_persist_msg_count=15&insert_task_id=&live_reason=&room_id=7383264938631973686&heartbeatDuration=0&signature=6DJMtCOOuubiYZP4"
  137. headers = {
  138. "cookie": f"ttwid={self.ttwid}",
  139. 'user-agent': self.user_agent,
  140. }
  141. self.ws = websocket.WebSocketApp(wss,
  142. header=headers,
  143. on_open=self._wsOnOpen,
  144. on_message=self._wsOnMessage,
  145. on_error=self._wsOnError,
  146. on_close=self._wsOnClose)
  147. try:
  148. self.ws.run_forever()
  149. except Exception:
  150. self.stop()
  151. raise
  152. def _wsOnOpen(self, ws):
  153. """
  154. 连接建立成功
  155. """
  156. print("WebSocket connected.")
  157. self.is_connected = True
  158. def _wsOnMessage(self, ws, message):
  159. """
  160. 接收到数据
  161. :param ws: websocket实例
  162. :param message: 数据
  163. """
  164. # 根据proto结构体解析对象
  165. package = PushFrame().parse(message)
  166. response = Response().parse(gzip.decompress(package.payload))
  167. # 返回直播间服务器链接存活确认消息,便于持续获取数据
  168. if response.need_ack:
  169. ack = PushFrame(log_id=package.log_id,
  170. payload_type='ack',
  171. payload=response.internal_ext.encode('utf-8')
  172. ).SerializeToString()
  173. ws.send(ack, websocket.ABNF.OPCODE_BINARY)
  174. # 根据消息类别解析消息体
  175. for msg in response.messages_list:
  176. method = msg.method
  177. try:
  178. {
  179. 'WebcastChatMessage': self._parseChatMsg, # 聊天消息
  180. 'WebcastGiftMessage': self._parseGiftMsg, # 礼物消息
  181. 'WebcastLikeMessage': self._parseLikeMsg, # 点赞消息
  182. 'WebcastMemberMessage': self._parseMemberMsg, # 进入直播间消息
  183. 'WebcastSocialMessage': self._parseSocialMsg, # 关注消息
  184. 'WebcastRoomUserSeqMessage': self._parseRoomUserSeqMsg, # 直播间统计
  185. 'WebcastFansclubMessage': self._parseFansclubMsg, # 粉丝团消息
  186. 'WebcastControlMessage': self._parseControlMsg, # 直播间状态消息
  187. 'WebcastEmojiChatMessage': self._parseEmojiChatMsg, # 聊天表情包消息
  188. 'WebcastRoomStatsMessage': self._parseRoomStatsMsg, # 直播间统计信息
  189. 'WebcastRoomMessage': self._parseRoomMsg, # 直播间信息
  190. 'WebcastRoomRankMessage': self._parseRankMsg, # 直播间排行榜信息
  191. }.get(method)(msg.payload)
  192. except Exception:
  193. pass
  194. def _wsOnError(self, ws, error):
  195. print("WebSocket error: ", error)
  196. self.is_connected = False
  197. def _wsOnClose(self, ws):
  198. print("WebSocket connection closed.")
  199. self.is_connected = False
  200. def _parseChatMsg(self, payload):
  201. '''聊天消息'''
  202. message = ChatMessage().parse(payload)
  203. user_name = message.user.nick_name
  204. user_id = message.user.id
  205. content = message.content
  206. print(f"【聊天msg】[{user_id}]{user_name}: {content}")
  207. def _parseGiftMsg(self, payload):
  208. '''礼物消息'''
  209. message = GiftMessage().parse(payload)
  210. user_name = message.user.nick_name
  211. gift_name = message.gift.name
  212. gift_cnt = message.combo_count
  213. print(f"【礼物msg】{user_name} 送出了 {gift_name}x{gift_cnt}")
  214. def _parseLikeMsg(self, payload):
  215. '''点赞消息'''
  216. message = LikeMessage().parse(payload)
  217. user_name = message.user.nick_name
  218. count = message.count
  219. print(f"【点赞msg】{user_name} 点了{count}个赞")
  220. def _parseMemberMsg(self, payload):
  221. '''进入直播间消息'''
  222. message = MemberMessage().parse(payload)
  223. user_name = message.user.nick_name
  224. user_id = message.user.id
  225. gender = ["女", "男"][message.user.gender]
  226. print(f"【进场msg】[{user_id}][{gender}]{user_name} 进入了直播间")
  227. def _parseSocialMsg(self, payload):
  228. '''关注消息'''
  229. message = SocialMessage().parse(payload)
  230. user_name = message.user.nick_name
  231. user_id = message.user.id
  232. print(f"【关注msg】[{user_id}]{user_name} 关注了主播")
  233. def _parseRoomUserSeqMsg(self, payload):
  234. '''直播间统计'''
  235. message = RoomUserSeqMessage().parse(payload)
  236. current = message.total
  237. total = message.total_pv_for_anchor
  238. print(f"【统计msg】当前观看人数: {current}, 累计观看人数: {total}")
  239. def _parseFansclubMsg(self, payload):
  240. '''粉丝团消息'''
  241. message = FansclubMessage().parse(payload)
  242. content = message.content
  243. print(f"【粉丝团msg】 {content}")
  244. def _parseEmojiChatMsg(self, payload):
  245. '''聊天表情包消息'''
  246. message = EmojiChatMessage().parse(payload)
  247. emoji_id = message.emoji_id
  248. user = message.user
  249. common = message.common
  250. default_content = message.default_content
  251. print(f"【聊天表情包id】 {emoji_id},user:{user},common:{common},default_content:{default_content}")
  252. def _parseRoomMsg(self, payload):
  253. message = RoomMessage().parse(payload)
  254. common = message.common
  255. room_id = common.room_id
  256. print(f"【直播间msg】直播间id:{room_id}")
  257. def _parseRoomStatsMsg(self, payload):
  258. message = RoomStatsMessage().parse(payload)
  259. display_long = message.display_long
  260. print(f"【直播间统计msg】{display_long}")
  261. def _parseRankMsg(self, payload):
  262. message = RoomRankMessage().parse(payload)
  263. ranks_list = message.ranks_list
  264. print(f"【直播间排行榜msg】{ranks_list}")
  265. def _parseControlMsg(self, payload):
  266. '''直播间状态消息'''
  267. message = ControlMessage().parse(payload)
  268. if message.status == 3:
  269. print("直播间已结束")
  270. self.stop()
  271. if __name__ == '__main__':
  272. live_id = '386798490464'
  273. DouyinLiveWebFetcher(live_id).start()