Source: lib/text/mp4_vtt_parser.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.text.Mp4VttParser');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.log');
  9. goog.require('shaka.text.Cue');
  10. goog.require('shaka.text.TextEngine');
  11. goog.require('shaka.text.VttTextParser');
  12. goog.require('shaka.util.DataViewReader');
  13. goog.require('shaka.util.Error');
  14. goog.require('shaka.util.Functional');
  15. goog.require('shaka.util.Mp4Parser');
  16. goog.require('shaka.util.Mp4BoxParsers');
  17. goog.require('shaka.util.StringUtils');
  18. goog.require('shaka.util.TextParser');
  19. /**
  20. * @implements {shaka.extern.TextParser}
  21. * @export
  22. */
  23. shaka.text.Mp4VttParser = class {
  24. /** */
  25. constructor() {
  26. /**
  27. * The current time scale used by the VTT parser.
  28. *
  29. * @type {?number}
  30. * @private
  31. */
  32. this.timescale_ = null;
  33. }
  34. /**
  35. * @override
  36. * @export
  37. */
  38. parseInit(data) {
  39. const Mp4Parser = shaka.util.Mp4Parser;
  40. let sawWVTT = false;
  41. new Mp4Parser()
  42. .box('moov', Mp4Parser.children)
  43. .box('trak', Mp4Parser.children)
  44. .box('mdia', Mp4Parser.children)
  45. .fullBox('mdhd', (box) => {
  46. goog.asserts.assert(
  47. box.version == 0 || box.version == 1,
  48. 'MDHD version can only be 0 or 1');
  49. const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD(
  50. box.reader, box.version);
  51. this.timescale_ = parsedMDHDBox.timescale;
  52. })
  53. .box('minf', Mp4Parser.children)
  54. .box('stbl', Mp4Parser.children)
  55. .fullBox('stsd', Mp4Parser.sampleDescription)
  56. .box('wvtt', (box) => {
  57. // A valid vtt init segment, though we have no actual subtitles yet.
  58. sawWVTT = true;
  59. }).parse(data);
  60. if (!this.timescale_) {
  61. // Missing timescale for VTT content. It should be located in the MDHD.
  62. throw new shaka.util.Error(
  63. shaka.util.Error.Severity.CRITICAL,
  64. shaka.util.Error.Category.TEXT,
  65. shaka.util.Error.Code.INVALID_MP4_VTT);
  66. }
  67. if (!sawWVTT) {
  68. // A WVTT box should have been seen (a valid vtt init segment with no
  69. // actual subtitles).
  70. throw new shaka.util.Error(
  71. shaka.util.Error.Severity.CRITICAL,
  72. shaka.util.Error.Category.TEXT,
  73. shaka.util.Error.Code.INVALID_MP4_VTT);
  74. }
  75. }
  76. /**
  77. * @override
  78. * @export
  79. */
  80. setSequenceMode(sequenceMode) {
  81. // Unused.
  82. }
  83. /**
  84. * @override
  85. * @export
  86. */
  87. setManifestType(manifestType) {
  88. // Unused.
  89. }
  90. /**
  91. * @override
  92. * @export
  93. */
  94. parseMedia(data, time) {
  95. if (!data.length) {
  96. return [];
  97. }
  98. if (!this.timescale_) {
  99. // Missing timescale for VTT content. We should have seen the init
  100. // segment.
  101. shaka.log.error('No init segment for MP4+VTT!');
  102. throw new shaka.util.Error(
  103. shaka.util.Error.Severity.CRITICAL,
  104. shaka.util.Error.Category.TEXT,
  105. shaka.util.Error.Code.INVALID_MP4_VTT);
  106. }
  107. const Mp4Parser = shaka.util.Mp4Parser;
  108. let baseTime = 0;
  109. /** @type {!Array<shaka.util.ParsedTRUNSample>} */
  110. let presentations = [];
  111. /** @type {!Uint8Array} */
  112. let rawPayload;
  113. /** @type {!Array<shaka.text.Cue>} */
  114. const cues = [];
  115. let sawTFDT = false;
  116. let sawTRUN = false;
  117. let sawMDAT = false;
  118. let defaultDuration = null;
  119. const parser = new Mp4Parser()
  120. .box('moof', Mp4Parser.children)
  121. .box('traf', Mp4Parser.children)
  122. .fullBox('tfdt', (box) => {
  123. sawTFDT = true;
  124. goog.asserts.assert(
  125. box.version == 0 || box.version == 1,
  126. 'TFDT version can only be 0 or 1');
  127. const parsedTFDTBox = shaka.util.Mp4BoxParsers.parseTFDTInaccurate(
  128. box.reader, box.version);
  129. baseTime = parsedTFDTBox.baseMediaDecodeTime;
  130. })
  131. .fullBox('tfhd', (box) => {
  132. goog.asserts.assert(
  133. box.flags != null,
  134. 'A TFHD box should have a valid flags value');
  135. const parsedTFHDBox = shaka.util.Mp4BoxParsers.parseTFHD(
  136. box.reader, box.flags);
  137. defaultDuration = parsedTFHDBox.defaultSampleDuration;
  138. })
  139. .fullBox('trun', (box) => {
  140. sawTRUN = true;
  141. goog.asserts.assert(
  142. box.version != null,
  143. 'A TRUN box should have a valid version value');
  144. goog.asserts.assert(
  145. box.flags != null,
  146. 'A TRUN box should have a valid flags value');
  147. const parsedTRUNBox = shaka.util.Mp4BoxParsers.parseTRUN(
  148. box.reader, box.version, box.flags);
  149. presentations = parsedTRUNBox.sampleData;
  150. })
  151. .box('mdat', Mp4Parser.allData((data) => {
  152. goog.asserts.assert(
  153. !sawMDAT,
  154. 'VTT cues in mp4 with multiple MDAT are not currently supported');
  155. sawMDAT = true;
  156. rawPayload = data;
  157. }));
  158. parser.parse(data, /* partialOkay= */ false);
  159. if (!sawMDAT && !sawTFDT && !sawTRUN) {
  160. // A required box is missing.
  161. throw new shaka.util.Error(
  162. shaka.util.Error.Severity.CRITICAL,
  163. shaka.util.Error.Category.TEXT,
  164. shaka.util.Error.Code.INVALID_MP4_VTT);
  165. }
  166. let currentTime = baseTime;
  167. /** @type {!shaka.util.DataViewReader} */
  168. const reader = new shaka.util.DataViewReader(
  169. rawPayload, shaka.util.DataViewReader.Endianness.BIG_ENDIAN);
  170. for (const presentation of presentations) {
  171. // If one presentation corresponds to multiple payloads, it is assumed
  172. // that all of those payloads have the same start time and duration.
  173. const duration = presentation.sampleDuration || defaultDuration;
  174. const startTime = presentation.sampleCompositionTimeOffset ?
  175. baseTime + presentation.sampleCompositionTimeOffset :
  176. currentTime;
  177. currentTime = startTime + (duration || 0);
  178. // Read samples until it adds up to the given size.
  179. let totalSize = 0;
  180. do {
  181. // Read the payload size.
  182. const payloadSize = reader.readUint32();
  183. totalSize += payloadSize;
  184. // Skip the type.
  185. const payloadType = reader.readUint32();
  186. const payloadName = shaka.util.Mp4Parser.typeToString(payloadType);
  187. // Read the data payload.
  188. /** @type {Uint8Array} */
  189. let payload = null;
  190. if (payloadName == 'vttc') {
  191. if (payloadSize > 8) {
  192. payload = reader.readBytes(payloadSize - 8);
  193. }
  194. } else if (payloadName == 'vtte') {
  195. // It's a vtte, which is a vtt cue that is empty. Ignore any data that
  196. // does exist.
  197. reader.skip(payloadSize - 8);
  198. } else {
  199. shaka.log.error('Unknown box ' + payloadName + '! Skipping!');
  200. reader.skip(payloadSize - 8);
  201. }
  202. if (duration) {
  203. if (payload) {
  204. goog.asserts.assert(
  205. this.timescale_ != null, 'Timescale should not be null!');
  206. const cue = shaka.text.Mp4VttParser.parseVTTC_(
  207. payload,
  208. time.periodStart + startTime / this.timescale_,
  209. time.periodStart + currentTime / this.timescale_);
  210. cues.push(cue);
  211. }
  212. } else {
  213. shaka.log.error(
  214. 'WVTT sample duration unknown, and no default found!');
  215. }
  216. goog.asserts.assert(
  217. !presentation.sampleSize || totalSize <= presentation.sampleSize,
  218. 'The samples do not fit evenly into the sample sizes given in ' +
  219. 'the TRUN box!');
  220. // If no sampleSize was specified, it's assumed that this presentation
  221. // corresponds to only a single cue.
  222. } while (presentation.sampleSize &&
  223. (totalSize < presentation.sampleSize));
  224. }
  225. goog.asserts.assert(
  226. !reader.hasMoreData(),
  227. 'MDAT which contain VTT cues and non-VTT data are not currently ' +
  228. 'supported!');
  229. return /** @type {!Array<!shaka.text.Cue>} */ (
  230. cues.filter(shaka.util.Functional.isNotNull));
  231. }
  232. /**
  233. * Parses a vttc box into a cue.
  234. *
  235. * @param {!Uint8Array} data
  236. * @param {number} startTime
  237. * @param {number} endTime
  238. * @return {shaka.text.Cue}
  239. * @private
  240. */
  241. static parseVTTC_(data, startTime, endTime) {
  242. let payload;
  243. let id;
  244. let settings;
  245. new shaka.util.Mp4Parser()
  246. .box('payl', shaka.util.Mp4Parser.allData((data) => {
  247. payload = shaka.util.StringUtils.fromUTF8(data);
  248. }))
  249. .box('iden', shaka.util.Mp4Parser.allData((data) => {
  250. id = shaka.util.StringUtils.fromUTF8(data);
  251. }))
  252. .box('sttg', shaka.util.Mp4Parser.allData((data) => {
  253. settings = shaka.util.StringUtils.fromUTF8(data);
  254. }))
  255. .parse(data);
  256. if (payload) {
  257. return shaka.text.Mp4VttParser.assembleCue_(
  258. payload, id, settings, startTime, endTime);
  259. } else {
  260. return null;
  261. }
  262. }
  263. /**
  264. * Take the individual components that make a cue and create a vttc cue.
  265. *
  266. * @param {string} payload
  267. * @param {?string} id
  268. * @param {?string} settings
  269. * @param {number} startTime
  270. * @param {number} endTime
  271. * @return {!shaka.text.Cue}
  272. * @private
  273. */
  274. static assembleCue_(payload, id, settings, startTime, endTime) {
  275. const cue = new shaka.text.Cue(startTime, endTime, payload);
  276. shaka.text.Cue.parseCuePayload(cue);
  277. if (id) {
  278. cue.id = id;
  279. }
  280. if (settings) {
  281. const parser = new shaka.util.TextParser(settings);
  282. let word = parser.readWord();
  283. while (word) {
  284. // TODO: Check WebVTTConfigurationBox for region info.
  285. if (!shaka.text.VttTextParser.parseCueSetting(
  286. cue, word, /* VTTRegions= */[])) {
  287. shaka.log.warning(
  288. 'VTT parser encountered an invalid VTT setting: ', word,
  289. ' The setting will be ignored.');
  290. }
  291. parser.skipWhitespace();
  292. word = parser.readWord();
  293. }
  294. }
  295. return cue;
  296. }
  297. };
  298. shaka.text.TextEngine.registerParser(
  299. 'application/mp4; codecs="wvtt"', () => new shaka.text.Mp4VttParser());