sps.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. package h264
  2. import (
  3. "fmt"
  4. "github.com/AlexxIT/go2rtc/pkg/bits"
  5. )
  6. // http://www.itu.int/rec/T-REC-H.264
  7. // https://webrtc.googlesource.com/src/+/refs/heads/main/common_video/h264/sps_parser.cc
  8. //goland:noinspection GoSnakeCaseUsage
  9. type SPS struct {
  10. profile_idc uint8
  11. profile_iop uint8
  12. level_idc uint8
  13. seq_parameter_set_id uint32
  14. chroma_format_idc uint32
  15. separate_colour_plane_flag byte
  16. bit_depth_luma_minus8 uint32
  17. bit_depth_chroma_minus8 uint32
  18. qpprime_y_zero_transform_bypass_flag byte
  19. seq_scaling_matrix_present_flag byte
  20. log2_max_frame_num_minus4 uint32
  21. pic_order_cnt_type uint32
  22. log2_max_pic_order_cnt_lsb_minus4 uint32
  23. delta_pic_order_always_zero_flag byte
  24. offset_for_non_ref_pic int32
  25. offset_for_top_to_bottom_field int32
  26. num_ref_frames_in_pic_order_cnt_cycle uint32
  27. num_ref_frames uint32
  28. gaps_in_frame_num_value_allowed_flag byte
  29. pic_width_in_mbs_minus_1 uint32
  30. pic_height_in_map_units_minus_1 uint32
  31. frame_mbs_only_flag byte
  32. mb_adaptive_frame_field_flag byte
  33. direct_8x8_inference_flag byte
  34. frame_cropping_flag byte
  35. frame_crop_left_offset uint32
  36. frame_crop_right_offset uint32
  37. frame_crop_top_offset uint32
  38. frame_crop_bottom_offset uint32
  39. vui_parameters_present_flag byte
  40. aspect_ratio_info_present_flag byte
  41. aspect_ratio_idc byte
  42. sar_width uint16
  43. sar_height uint16
  44. overscan_info_present_flag byte
  45. overscan_appropriate_flag byte
  46. video_signal_type_present_flag byte
  47. video_format uint8
  48. video_full_range_flag byte
  49. colour_description_present_flag byte
  50. colour_description uint32
  51. chroma_loc_info_present_flag byte
  52. chroma_sample_loc_type_top_field uint32
  53. chroma_sample_loc_type_bottom_field uint32
  54. timing_info_present_flag byte
  55. num_units_in_tick uint32
  56. time_scale uint32
  57. fixed_frame_rate_flag byte
  58. }
  59. func (s *SPS) Width() uint16 {
  60. width := 16 * (s.pic_width_in_mbs_minus_1 + 1)
  61. crop := 2 * (s.frame_crop_left_offset + s.frame_crop_right_offset)
  62. return uint16(width - crop)
  63. }
  64. func (s *SPS) Height() uint16 {
  65. height := 16 * (s.pic_height_in_map_units_minus_1 + 1)
  66. crop := 2 * (s.frame_crop_top_offset + s.frame_crop_bottom_offset)
  67. if s.frame_mbs_only_flag == 0 {
  68. height *= 2
  69. }
  70. return uint16(height - crop)
  71. }
  72. func DecodeSPS(sps []byte) *SPS {
  73. r := bits.NewReader(sps)
  74. hdr := r.ReadByte()
  75. if hdr&0x1F != NALUTypeSPS {
  76. return nil
  77. }
  78. s := &SPS{
  79. profile_idc: r.ReadByte(),
  80. profile_iop: r.ReadByte(),
  81. level_idc: r.ReadByte(),
  82. seq_parameter_set_id: r.ReadUEGolomb(),
  83. }
  84. switch s.profile_idc {
  85. case 100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135:
  86. n := byte(8)
  87. s.chroma_format_idc = r.ReadUEGolomb()
  88. if s.chroma_format_idc == 3 {
  89. s.separate_colour_plane_flag = r.ReadBit()
  90. n = 12
  91. }
  92. s.bit_depth_luma_minus8 = r.ReadUEGolomb()
  93. s.bit_depth_chroma_minus8 = r.ReadUEGolomb()
  94. s.qpprime_y_zero_transform_bypass_flag = r.ReadBit()
  95. s.seq_scaling_matrix_present_flag = r.ReadBit()
  96. if s.seq_scaling_matrix_present_flag != 0 {
  97. for i := byte(0); i < n; i++ {
  98. //goland:noinspection GoSnakeCaseUsage
  99. seq_scaling_list_present_flag := r.ReadBit()
  100. if seq_scaling_list_present_flag != 0 {
  101. if i < 6 {
  102. s.scaling_list(r, 16)
  103. } else {
  104. s.scaling_list(r, 64)
  105. }
  106. }
  107. }
  108. }
  109. }
  110. s.log2_max_frame_num_minus4 = r.ReadUEGolomb()
  111. s.pic_order_cnt_type = r.ReadUEGolomb()
  112. switch s.pic_order_cnt_type {
  113. case 0:
  114. s.log2_max_pic_order_cnt_lsb_minus4 = r.ReadUEGolomb()
  115. case 1:
  116. s.delta_pic_order_always_zero_flag = r.ReadBit()
  117. s.offset_for_non_ref_pic = r.ReadSEGolomb()
  118. s.offset_for_top_to_bottom_field = r.ReadSEGolomb()
  119. s.num_ref_frames_in_pic_order_cnt_cycle = r.ReadUEGolomb()
  120. for i := uint32(0); i < s.num_ref_frames_in_pic_order_cnt_cycle; i++ {
  121. _ = r.ReadSEGolomb() // offset_for_ref_frame[i]
  122. }
  123. }
  124. s.num_ref_frames = r.ReadUEGolomb()
  125. s.gaps_in_frame_num_value_allowed_flag = r.ReadBit()
  126. s.pic_width_in_mbs_minus_1 = r.ReadUEGolomb()
  127. s.pic_height_in_map_units_minus_1 = r.ReadUEGolomb()
  128. s.frame_mbs_only_flag = r.ReadBit()
  129. if s.frame_mbs_only_flag == 0 {
  130. s.mb_adaptive_frame_field_flag = r.ReadBit()
  131. }
  132. s.direct_8x8_inference_flag = r.ReadBit()
  133. s.frame_cropping_flag = r.ReadBit()
  134. if s.frame_cropping_flag != 0 {
  135. s.frame_crop_left_offset = r.ReadUEGolomb()
  136. s.frame_crop_right_offset = r.ReadUEGolomb()
  137. s.frame_crop_top_offset = r.ReadUEGolomb()
  138. s.frame_crop_bottom_offset = r.ReadUEGolomb()
  139. }
  140. s.vui_parameters_present_flag = r.ReadBit()
  141. if s.vui_parameters_present_flag != 0 {
  142. s.aspect_ratio_info_present_flag = r.ReadBit()
  143. if s.aspect_ratio_info_present_flag != 0 {
  144. s.aspect_ratio_idc = r.ReadByte()
  145. if s.aspect_ratio_idc == 255 {
  146. s.sar_width = r.ReadUint16()
  147. s.sar_height = r.ReadUint16()
  148. }
  149. }
  150. s.overscan_info_present_flag = r.ReadBit()
  151. if s.overscan_info_present_flag != 0 {
  152. s.overscan_appropriate_flag = r.ReadBit()
  153. }
  154. s.video_signal_type_present_flag = r.ReadBit()
  155. if s.video_signal_type_present_flag != 0 {
  156. s.video_format = r.ReadBits8(3)
  157. s.video_full_range_flag = r.ReadBit()
  158. s.colour_description_present_flag = r.ReadBit()
  159. if s.colour_description_present_flag != 0 {
  160. s.colour_description = r.ReadUint24()
  161. }
  162. }
  163. s.chroma_loc_info_present_flag = r.ReadBit()
  164. if s.chroma_loc_info_present_flag != 0 {
  165. s.chroma_sample_loc_type_top_field = r.ReadUEGolomb()
  166. s.chroma_sample_loc_type_bottom_field = r.ReadUEGolomb()
  167. }
  168. s.timing_info_present_flag = r.ReadBit()
  169. if s.timing_info_present_flag != 0 {
  170. s.num_units_in_tick = r.ReadUint32()
  171. s.time_scale = r.ReadUint32()
  172. s.fixed_frame_rate_flag = r.ReadBit()
  173. }
  174. //...
  175. }
  176. if r.EOF {
  177. return nil
  178. }
  179. return s
  180. }
  181. //goland:noinspection GoSnakeCaseUsage
  182. func (s *SPS) scaling_list(r *bits.Reader, sizeOfScalingList int) {
  183. lastScale := int32(8)
  184. nextScale := int32(8)
  185. for j := 0; j < sizeOfScalingList; j++ {
  186. if nextScale != 0 {
  187. delta_scale := r.ReadSEGolomb()
  188. nextScale = (lastScale + delta_scale + 256) % 256
  189. }
  190. if nextScale != 0 {
  191. lastScale = nextScale
  192. }
  193. }
  194. }
  195. func (s *SPS) Profile() string {
  196. switch s.profile_idc {
  197. case 0x42:
  198. return "Baseline"
  199. case 0x4D:
  200. return "Main"
  201. case 0x58:
  202. return "Extended"
  203. case 0x64:
  204. return "High"
  205. }
  206. return fmt.Sprintf("0x%02X", s.profile_idc)
  207. }
  208. func (s *SPS) PixFmt() string {
  209. if s.bit_depth_luma_minus8 == 0 {
  210. switch s.chroma_format_idc {
  211. case 1:
  212. if s.video_full_range_flag == 1 {
  213. return "yuvj420p"
  214. }
  215. return "yuv420p"
  216. case 2:
  217. return "yuv422p"
  218. case 3:
  219. return "yuv444p"
  220. }
  221. }
  222. return ""
  223. }
  224. func (s *SPS) String() string {
  225. return fmt.Sprintf(
  226. "%s %d.%d, %s, %dx%d",
  227. s.Profile(), s.level_idc/10, s.level_idc%10, s.PixFmt(), s.Width(), s.Height(),
  228. )
  229. }
  230. // FixPixFmt - change yuvj420p to yuv420p in SPS
  231. // same as "-c:v copy -bsf:v h264_metadata=video_full_range_flag=0"
  232. func FixPixFmt(sps []byte) {
  233. r := bits.NewReader(sps)
  234. _ = r.ReadByte()
  235. profile := r.ReadByte()
  236. _ = r.ReadByte()
  237. _ = r.ReadByte()
  238. _ = r.ReadUEGolomb()
  239. switch profile {
  240. case 100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135:
  241. n := byte(8)
  242. if r.ReadUEGolomb() == 3 {
  243. _ = r.ReadBit()
  244. n = 12
  245. }
  246. _ = r.ReadUEGolomb()
  247. _ = r.ReadUEGolomb()
  248. _ = r.ReadBit()
  249. if r.ReadBit() != 0 {
  250. for i := byte(0); i < n; i++ {
  251. if r.ReadBit() != 0 {
  252. return // skip
  253. }
  254. }
  255. }
  256. }
  257. _ = r.ReadUEGolomb()
  258. switch r.ReadUEGolomb() {
  259. case 0:
  260. _ = r.ReadUEGolomb()
  261. case 1:
  262. _ = r.ReadBit()
  263. _ = r.ReadSEGolomb()
  264. _ = r.ReadSEGolomb()
  265. n := r.ReadUEGolomb()
  266. for i := uint32(0); i < n; i++ {
  267. _ = r.ReadSEGolomb()
  268. }
  269. }
  270. _ = r.ReadUEGolomb()
  271. _ = r.ReadBit()
  272. _ = r.ReadUEGolomb()
  273. _ = r.ReadUEGolomb()
  274. if r.ReadBit() == 0 {
  275. _ = r.ReadBit()
  276. }
  277. _ = r.ReadBit()
  278. if r.ReadBit() != 0 {
  279. _ = r.ReadUEGolomb()
  280. _ = r.ReadUEGolomb()
  281. _ = r.ReadUEGolomb()
  282. _ = r.ReadUEGolomb()
  283. }
  284. if r.ReadBit() != 0 {
  285. if r.ReadBit() != 0 {
  286. if r.ReadByte() == 255 {
  287. _ = r.ReadUint16()
  288. _ = r.ReadUint16()
  289. }
  290. }
  291. if r.ReadBit() != 0 {
  292. _ = r.ReadBit()
  293. }
  294. if r.ReadBit() != 0 {
  295. _ = r.ReadBits8(3)
  296. if r.ReadBit() == 1 {
  297. pos, bit := r.Pos()
  298. sps[pos] &= ^byte(1 << bit)
  299. }
  300. }
  301. }
  302. }