Source: lib/text/vtt_text_parser.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.text.VttTextParser');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.log');
  9. goog.require('shaka.media.ManifestParser');
  10. goog.require('shaka.text.Cue');
  11. goog.require('shaka.text.CueRegion');
  12. goog.require('shaka.text.TextEngine');
  13. goog.require('shaka.util.Error');
  14. goog.require('shaka.util.StringUtils');
  15. goog.require('shaka.util.TextParser');
  16. /**
  17. * @implements {shaka.extern.TextParser}
  18. * @export
  19. */
  20. shaka.text.VttTextParser = class {
  21. /** Constructs a VTT parser. */
  22. constructor() {
  23. /** @private {boolean} */
  24. this.sequenceMode_ = false;
  25. /** @private {string} */
  26. this.manifestType_ = shaka.media.ManifestParser.UNKNOWN;
  27. }
  28. /**
  29. * @override
  30. * @export
  31. */
  32. parseInit(data) {
  33. goog.asserts.assert(false, 'VTT does not have init segments');
  34. }
  35. /**
  36. * @override
  37. * @export
  38. */
  39. setSequenceMode(sequenceMode) {
  40. this.sequenceMode_ = sequenceMode;
  41. }
  42. /**
  43. * @override
  44. * @export
  45. */
  46. setManifestType(manifestType) {
  47. this.manifestType_ = manifestType;
  48. }
  49. /**
  50. * @override
  51. * @export
  52. */
  53. parseMedia(data, time) {
  54. const VttTextParser = shaka.text.VttTextParser;
  55. // Get the input as a string. Normalize newlines to \n.
  56. let str = shaka.util.StringUtils.fromUTF8(data);
  57. str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n');
  58. const blocks = str.split(/\n{2,}/m);
  59. if (!/^WEBVTT($|[ \t\n])/m.test(blocks[0])) {
  60. throw new shaka.util.Error(
  61. shaka.util.Error.Severity.CRITICAL,
  62. shaka.util.Error.Category.TEXT,
  63. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  64. }
  65. // Depending on "segmentRelativeVttTiming" configuration,
  66. // "vttOffset" will correspond to either "periodStart" (default)
  67. // or "segmentStart", for segmented VTT where timings are relative
  68. // to the beginning of each segment.
  69. // NOTE: "periodStart" is the timestamp offset applied via TextEngine.
  70. // It is no longer closely tied to periods, but the name stuck around.
  71. // NOTE: This offset and the flag choosing its meaning have no effect on
  72. // HLS content, which should use X-TIMESTAMP-MAP and periodStart instead.
  73. let offset = time.vttOffset;
  74. if (this.manifestType_ == shaka.media.ManifestParser.HLS) {
  75. // Only use 'X-TIMESTAMP-MAP' with HLS.
  76. if (blocks[0].includes('X-TIMESTAMP-MAP')) {
  77. offset = this.computeHlsOffset_(blocks[0], time);
  78. } else if (time.periodStart && time.vttOffset == time.periodStart) {
  79. // In the case where X-TIMESTAMP-MAP is not used and it is HLS, we
  80. // should not use offset unless segment-relative times are used.
  81. offset = 0;
  82. }
  83. }
  84. // Parse VTT regions.
  85. /* !Array<!shaka.text.CueRegion> */
  86. const regions = [];
  87. for (const line of blocks[0].split('\n')) {
  88. if (/^Region:/.test(line)) {
  89. const region = VttTextParser.parseRegion_(line);
  90. regions.push(region);
  91. }
  92. }
  93. /** @type {!Map<string, !shaka.text.Cue>} */
  94. const styles = new Map();
  95. shaka.text.Cue.addDefaultTextColor(styles);
  96. // Parse cues.
  97. const ret = [];
  98. for (const block of blocks.slice(1)) {
  99. const lines = block.split('\n');
  100. VttTextParser.parseStyle_(lines, styles);
  101. const cue = VttTextParser.parseCue_(lines, offset, regions, styles);
  102. if (cue) {
  103. ret.push(cue);
  104. }
  105. }
  106. return ret;
  107. }
  108. /**
  109. * @param {string} headerBlock Contains X-TIMESTAMP-MAP.
  110. * @param {shaka.extern.TextParser.TimeContext} time
  111. * @return {number}
  112. * @private
  113. */
  114. computeHlsOffset_(headerBlock, time) {
  115. // https://bit.ly/2K92l7y
  116. // The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
  117. // the rest of the media.
  118. // The header format is 'X-TIMESTAMP-MAP=MPEGTS:n,LOCAL:m'
  119. // (the attributes can go in any order)
  120. // where n is MPEG-2 time and m is cue time it maps to.
  121. // For example 'X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:900000'
  122. // means an offset of 10 seconds
  123. // 900000/MPEG_TIMESCALE - cue time.
  124. const cueTimeMatch = headerBlock.match(
  125. /LOCAL:((?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3}))/m);
  126. const mpegTimeMatch = headerBlock.match(/MPEGTS:(\d+)/m);
  127. if (!cueTimeMatch || !mpegTimeMatch) {
  128. throw new shaka.util.Error(
  129. shaka.util.Error.Severity.CRITICAL,
  130. shaka.util.Error.Category.TEXT,
  131. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  132. }
  133. const cueTime = shaka.util.TextParser.parseTime(cueTimeMatch[1]);
  134. if (cueTime == null) {
  135. throw new shaka.util.Error(
  136. shaka.util.Error.Severity.CRITICAL,
  137. shaka.util.Error.Category.TEXT,
  138. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  139. }
  140. let mpegTime = Number(mpegTimeMatch[1]);
  141. const mpegTimescale = shaka.text.VttTextParser.MPEG_TIMESCALE_;
  142. const rolloverSeconds =
  143. shaka.text.VttTextParser.TS_ROLLOVER_ / mpegTimescale;
  144. let segmentStart = time.segmentStart - time.periodStart;
  145. while (segmentStart >= rolloverSeconds) {
  146. segmentStart -= rolloverSeconds;
  147. mpegTime += shaka.text.VttTextParser.TS_ROLLOVER_;
  148. }
  149. return time.periodStart + mpegTime / mpegTimescale - cueTime;
  150. }
  151. /**
  152. * Parses a string into a Region object.
  153. *
  154. * @param {string} text
  155. * @return {!shaka.text.CueRegion}
  156. * @private
  157. */
  158. static parseRegion_(text) {
  159. const VttTextParser = shaka.text.VttTextParser;
  160. const parser = new shaka.util.TextParser(text);
  161. // The region string looks like this:
  162. // Region: id=fred width=50% lines=3 regionanchor=0%,100%
  163. // viewportanchor=10%,90% scroll=up
  164. const region = new shaka.text.CueRegion();
  165. // Skip 'Region:'
  166. parser.readWord();
  167. parser.skipWhitespace();
  168. let word = parser.readWord();
  169. while (word) {
  170. if (!VttTextParser.parseRegionSetting_(region, word)) {
  171. shaka.log.warning(
  172. 'VTT parser encountered an invalid VTTRegion setting: ', word,
  173. ' The setting will be ignored.');
  174. }
  175. parser.skipWhitespace();
  176. word = parser.readWord();
  177. }
  178. return region;
  179. }
  180. /**
  181. * Parses a style block into a Cue object.
  182. *
  183. * @param {!Array<string>} text
  184. * @param {!Map<string, !shaka.text.Cue>} styles
  185. * @private
  186. */
  187. static parseStyle_(text, styles) {
  188. // Skip empty blocks.
  189. if (text.length == 1 && !text[0]) {
  190. return;
  191. }
  192. // Skip comment blocks.
  193. if (/^NOTE($|[ \t])/.test(text[0])) {
  194. return;
  195. }
  196. // Only style block are allowed.
  197. if (text[0] != 'STYLE') {
  198. return;
  199. }
  200. /** @type {!Array<!Array<string>>} */
  201. const styleBlocks = [];
  202. let lastBlockIndex = -1;
  203. for (let i = 1; i < text.length; i++) {
  204. if (text[i].includes('::cue')) {
  205. styleBlocks.push([]);
  206. lastBlockIndex = styleBlocks.length - 1;
  207. }
  208. if (lastBlockIndex == -1) {
  209. continue;
  210. }
  211. styleBlocks[lastBlockIndex].push(text[i]);
  212. if (text[i].includes('}')) {
  213. lastBlockIndex = -1;
  214. }
  215. }
  216. for (const styleBlock of styleBlocks) {
  217. let styleSelector = 'global';
  218. // Look for what is within parentheses. For example:
  219. // <code>:: cue (b) {</code>, what we are looking for is <code>b</code>
  220. const selector = styleBlock[0].match(/\((.*)\)/);
  221. if (selector) {
  222. styleSelector = selector.pop();
  223. }
  224. // We start at 1 to avoid '::cue' and end earlier to avoid '}'
  225. let propertyLines = styleBlock.slice(1, -1);
  226. if (styleBlock[0].includes('}')) {
  227. const payload = /\{(.*?)\}/.exec(styleBlock[0]);
  228. if (payload) {
  229. propertyLines = payload[1].split(';');
  230. }
  231. }
  232. // Continue styles over multiple selectors if necessary.
  233. // For example,
  234. // ::cue(b) { background: white; } ::cue(b) { color: blue; }
  235. // should set both the background and foreground of bold tags.
  236. let cue = styles.get(styleSelector);
  237. if (!cue) {
  238. cue = new shaka.text.Cue(0, 0, '');
  239. }
  240. let validStyle = false;
  241. for (let i = 0; i < propertyLines.length; i++) {
  242. // We look for CSS properties. As a general rule they are separated by
  243. // <code>:</code>. Eg: <code>color: red;</code>
  244. const lineParts = /^\s*([^:]+):\s*(.*)/.exec(propertyLines[i]);
  245. if (lineParts) {
  246. const name = lineParts[1].trim();
  247. const value = lineParts[2].trim().replace(';', '');
  248. switch (name) {
  249. case 'background-color':
  250. case 'background':
  251. validStyle = true;
  252. cue.backgroundColor = value;
  253. break;
  254. case 'color':
  255. validStyle = true;
  256. cue.color = value;
  257. break;
  258. case 'font-family':
  259. validStyle = true;
  260. cue.fontFamily = value;
  261. break;
  262. case 'font-size':
  263. validStyle = true;
  264. cue.fontSize = value;
  265. break;
  266. case 'font-weight':
  267. if (parseInt(value, 10) >= 700 || value == 'bold') {
  268. validStyle = true;
  269. cue.fontWeight = shaka.text.Cue.fontWeight.BOLD;
  270. }
  271. break;
  272. case 'font-style':
  273. switch (value) {
  274. case 'normal':
  275. validStyle = true;
  276. cue.fontStyle = shaka.text.Cue.fontStyle.NORMAL;
  277. break;
  278. case 'italic':
  279. validStyle = true;
  280. cue.fontStyle = shaka.text.Cue.fontStyle.ITALIC;
  281. break;
  282. case 'oblique':
  283. validStyle = true;
  284. cue.fontStyle = shaka.text.Cue.fontStyle.OBLIQUE;
  285. break;
  286. }
  287. break;
  288. case 'opacity':
  289. validStyle = true;
  290. cue.opacity = parseFloat(value);
  291. break;
  292. case 'text-combine-upright':
  293. validStyle = true;
  294. cue.textCombineUpright = value;
  295. break;
  296. case 'text-shadow':
  297. validStyle = true;
  298. cue.textShadow = value;
  299. break;
  300. case 'white-space':
  301. validStyle = true;
  302. cue.wrapLine = value != 'noWrap';
  303. break;
  304. default:
  305. shaka.log.warning('VTT parser encountered an unsupported style: ',
  306. lineParts);
  307. break;
  308. }
  309. }
  310. }
  311. if (validStyle) {
  312. styles.set(styleSelector, cue);
  313. }
  314. }
  315. }
  316. /**
  317. * Parses a text block into a Cue object.
  318. *
  319. * @param {!Array<string>} text
  320. * @param {number} timeOffset
  321. * @param {!Array<!shaka.text.CueRegion>} regions
  322. * @param {!Map<string, !shaka.text.Cue>} styles
  323. * @return {shaka.text.Cue}
  324. * @private
  325. */
  326. static parseCue_(text, timeOffset, regions, styles) {
  327. const VttTextParser = shaka.text.VttTextParser;
  328. // Skip empty blocks.
  329. if (text.length == 1 && !text[0]) {
  330. return null;
  331. }
  332. // Skip comment blocks.
  333. if (/^NOTE($|[ \t])/.test(text[0])) {
  334. return null;
  335. }
  336. // Skip style and region blocks.
  337. if (text[0] == 'STYLE' || text[0] == 'REGION') {
  338. return null;
  339. }
  340. let id = null;
  341. if (!text[0].includes('-->')) {
  342. id = text[0];
  343. text.splice(0, 1);
  344. }
  345. // Parse the times.
  346. const parser = new shaka.util.TextParser(text[0]);
  347. let start = parser.parseTime();
  348. const expect = parser.readRegex(/[ \t]+-->[ \t]+/g);
  349. let end = parser.parseTime();
  350. if (start == null || expect == null || end == null) {
  351. shaka.log.alwaysWarn(
  352. 'Failed to parse VTT time code. Cue skipped:', id, text);
  353. return null;
  354. }
  355. start += timeOffset;
  356. end += timeOffset;
  357. // Get the payload.
  358. const payload = text.slice(1).join('\n').trim();
  359. let cue = null;
  360. if (styles.has('global')) {
  361. cue = styles.get('global').clone();
  362. cue.startTime = start;
  363. cue.endTime = end;
  364. cue.payload = payload;
  365. } else {
  366. cue = new shaka.text.Cue(start, end, payload);
  367. }
  368. // Parse optional settings.
  369. parser.skipWhitespace();
  370. let word = parser.readWord();
  371. while (word) {
  372. if (!VttTextParser.parseCueSetting(cue, word, regions)) {
  373. shaka.log.warning('VTT parser encountered an invalid VTT setting: ',
  374. word,
  375. ' The setting will be ignored.');
  376. }
  377. parser.skipWhitespace();
  378. word = parser.readWord();
  379. }
  380. shaka.text.Cue.parseCuePayload(cue, styles);
  381. if (id != null) {
  382. cue.id = id;
  383. }
  384. return cue;
  385. }
  386. /**
  387. * Parses a WebVTT setting from the given word.
  388. *
  389. * @param {!shaka.text.Cue} cue
  390. * @param {string} word
  391. * @param {!Array<!shaka.text.CueRegion>} regions
  392. * @return {boolean} True on success.
  393. */
  394. static parseCueSetting(cue, word, regions) {
  395. const VttTextParser = shaka.text.VttTextParser;
  396. let results = null;
  397. if ((results = /^align:(start|middle|center|end|left|right)$/.exec(word))) {
  398. VttTextParser.setTextAlign_(cue, results[1]);
  399. } else if ((results = /^vertical:(lr|rl)$/.exec(word))) {
  400. VttTextParser.setVerticalWritingMode_(cue, results[1]);
  401. } else if ((results = /^size:([\d.]+)%$/.exec(word))) {
  402. cue.size = Number(results[1]);
  403. } else if ((results =
  404. // eslint-disable-next-line max-len
  405. /^position:([\d.]+)%(?:,(line-left|line-right|middle|center|start|end|auto))?$/
  406. .exec(word))) {
  407. cue.position = Number(results[1]);
  408. if (results[2]) {
  409. VttTextParser.setPositionAlign_(cue, results[2]);
  410. }
  411. } else if ((results = /^region:(.*)$/.exec(word))) {
  412. const region = VttTextParser.getRegionById_(regions, results[1]);
  413. if (region) {
  414. cue.region = region;
  415. }
  416. } else {
  417. return VttTextParser.parsedLineValueAndInterpretation_(cue, word);
  418. }
  419. return true;
  420. }
  421. /**
  422. *
  423. * @param {!Array<!shaka.text.CueRegion>} regions
  424. * @param {string} id
  425. * @return {?shaka.text.CueRegion}
  426. * @private
  427. */
  428. static getRegionById_(regions, id) {
  429. const regionsWithId = regions.filter((region) => {
  430. return region.id == id;
  431. });
  432. if (!regionsWithId.length) {
  433. shaka.log.warning('VTT parser could not find a region with id: ',
  434. id,
  435. ' The region will be ignored.');
  436. return null;
  437. }
  438. goog.asserts.assert(regionsWithId.length == 1,
  439. 'VTTRegion ids should be unique!');
  440. return regionsWithId[0];
  441. }
  442. /**
  443. * Parses a WebVTTRegion setting from the given word.
  444. *
  445. * @param {!shaka.text.CueRegion} region
  446. * @param {string} word
  447. * @return {boolean} True on success.
  448. * @private
  449. */
  450. static parseRegionSetting_(region, word) {
  451. let results = null;
  452. if ((results = /^id=(.*)$/.exec(word))) {
  453. region.id = results[1];
  454. } else if ((results = /^width=(\d{1,2}|100)%$/.exec(word))) {
  455. region.width = Number(results[1]);
  456. } else if ((results = /^lines=(\d+)$/.exec(word))) {
  457. region.height = Number(results[1]);
  458. region.heightUnits = shaka.text.CueRegion.units.LINES;
  459. } else if ((results = /^regionanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  460. .exec(word))) {
  461. region.regionAnchorX = Number(results[1]);
  462. region.regionAnchorY = Number(results[2]);
  463. } else if ((results = /^viewportanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  464. .exec(word))) {
  465. region.viewportAnchorX = Number(results[1]);
  466. region.viewportAnchorY = Number(results[2]);
  467. } else if ((results = /^scroll=up$/.exec(word))) {
  468. region.scroll = shaka.text.CueRegion.scrollMode.UP;
  469. } else {
  470. return false;
  471. }
  472. return true;
  473. }
  474. /**
  475. * @param {!shaka.text.Cue} cue
  476. * @param {string} align
  477. * @private
  478. */
  479. static setTextAlign_(cue, align) {
  480. const Cue = shaka.text.Cue;
  481. if (align == 'middle') {
  482. cue.textAlign = Cue.textAlign.CENTER;
  483. } else {
  484. goog.asserts.assert(align.toUpperCase() in Cue.textAlign,
  485. align.toUpperCase() +
  486. ' Should be in Cue.textAlign values!');
  487. cue.textAlign = Cue.textAlign[align.toUpperCase()];
  488. }
  489. }
  490. /**
  491. * @param {!shaka.text.Cue} cue
  492. * @param {string} align
  493. * @private
  494. */
  495. static setPositionAlign_(cue, align) {
  496. const Cue = shaka.text.Cue;
  497. if (align == 'line-left' || align == 'start') {
  498. cue.positionAlign = Cue.positionAlign.LEFT;
  499. } else if (align == 'line-right' || align == 'end') {
  500. cue.positionAlign = Cue.positionAlign.RIGHT;
  501. } else if (align == 'center' || align == 'middle') {
  502. cue.positionAlign = Cue.positionAlign.CENTER;
  503. } else {
  504. cue.positionAlign = Cue.positionAlign.AUTO;
  505. }
  506. }
  507. /**
  508. * @param {!shaka.text.Cue} cue
  509. * @param {string} value
  510. * @private
  511. */
  512. static setVerticalWritingMode_(cue, value) {
  513. const Cue = shaka.text.Cue;
  514. if (value == 'lr') {
  515. cue.writingMode = Cue.writingMode.VERTICAL_LEFT_TO_RIGHT;
  516. } else {
  517. cue.writingMode = Cue.writingMode.VERTICAL_RIGHT_TO_LEFT;
  518. }
  519. }
  520. /**
  521. * @param {!shaka.text.Cue} cue
  522. * @param {string} word
  523. * @return {boolean}
  524. * @private
  525. */
  526. static parsedLineValueAndInterpretation_(cue, word) {
  527. const Cue = shaka.text.Cue;
  528. let results = null;
  529. if ((results = /^line:([\d.]+)%(?:,(start|end|center))?$/.exec(word))) {
  530. cue.lineInterpretation = Cue.lineInterpretation.PERCENTAGE;
  531. cue.line = Number(results[1]);
  532. if (results[2]) {
  533. goog.asserts.assert(
  534. results[2].toUpperCase() in Cue.lineAlign,
  535. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  536. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  537. }
  538. } else if ((results =
  539. /^line:(-?\d+)(?:,(start|end|center))?$/.exec(word))) {
  540. cue.lineInterpretation = Cue.lineInterpretation.LINE_NUMBER;
  541. cue.line = Number(results[1]);
  542. if (results[2]) {
  543. goog.asserts.assert(
  544. results[2].toUpperCase() in Cue.lineAlign,
  545. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  546. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  547. }
  548. } else {
  549. return false;
  550. }
  551. return true;
  552. }
  553. };
  554. /**
  555. * @const {number}
  556. * @private
  557. */
  558. shaka.text.VttTextParser.MPEG_TIMESCALE_ = 90000;
  559. /**
  560. * At this value, timestamps roll over in TS content.
  561. * @const {number}
  562. * @private
  563. */
  564. shaka.text.VttTextParser.TS_ROLLOVER_ = 0x200000000;
  565. shaka.text.TextEngine.registerParser(
  566. 'text/vtt', () => new shaka.text.VttTextParser());
  567. shaka.text.TextEngine.registerParser(
  568. 'text/vtt; codecs="vtt"', () => new shaka.text.VttTextParser());
  569. shaka.text.TextEngine.registerParser(
  570. 'text/vtt; codecs="wvtt"', () => new shaka.text.VttTextParser());