// // MPEG Closed Caption Extractor (v2.2) // // Version History: // 1.0: Initial release // 1.1: Handle special case of DVD format with ff xx xx ff yy yy pattern // 1.2: Swapped identification of ReplayTV 4000 and 5000 formats // (no visible effect), // Swapped field output for both ReplayTV formats // 1.3: improved faulty DVD format handling (thanks to Keith Hui) // 1.4: Added ATSC format generated by Pinnacle MediaStream // 2.0: Re-wrote using General Parser sample code so that caption code // is aware of picture type (required by ATSC format), // Added DSS-SD captions (they were included in video.bdl) // 2.1: Extraction of CC from ATSC streams fixed (thanks to Ken Schultz) // 2.2: Fixed the omission of some CC bytes from ATSC streams (by Ken Schultz) enum { START_CODE = 0x00000100, PICTURE_START_CODE = 0x00000100, USER_DATA_START_CODE = 0x000001b2, SEQUENCE_START_CODE = 0x000001b3, SEQUENCE_ERROR_START_CODE = 0x000001b4, EXTENSION_START_CODE = 0x000001b5, SEQUENCE_END_CODE = 0x000001b7, GROUP_START_CODE = 0x000001b8 }; enum { DVD_CLOSED_CAPTION = 0x434301f8, ATSC_CLOSED_CAPTION = 0x47413934, REPLAYTV_4000_CLOSED_CAPTION = 0xbb02, REPLAYTV_5000_CLOSED_CAPTION = 0x9902, DISH_NETWORK_CLOSED_CAPTION = 0x0502 }; enum { MAX_CLOSED_CAPTION_DATA_PER_PICTURE = 32 }; int sequence_counter; int DVD_FIELD[2]; // EIA 608 format captions (analog TV) DVD_FIELD[0] = 0xff; DVD_FIELD[1] = 0xfe; int HDCC_FIELD[2]; // EIA 708 format captions (HDTV) HDCC_FIELD[0] = 0xfc; HDCC_FIELD[1] = 0xfd; enum { DN_2_BYTE = 0x02, DN_4_BYTE = 0x04, DN_BUFFERED = 0x05 }; int REPEAT = 0x04; int ccPick = -1; while ((ccPick != 0) && (ccPick !=1)) { input "Closed Captioning Field 1 or 2" int f = 1; ccPick = --f; } int current_picture_coding_type; // ATSC transmits captions with every frame, so I & P frame data has to be held // until after B frame data or it would end up out of order int cc_buffer[MAX_CLOSED_CAPTION_DATA_PER_PICTURE]; // Dish Network transmits captions with every frame, so B frame data has to be held // or it would end up out of order int DN_buffer = -1; quiet void main(void) { int dummy; dummy = 0xffffffff; output dummy:32; int i; for (i=0; i < MAX_CLOSED_CAPTION_DATA_PER_PICTURE; i++) { cc_buffer[i] = -1; } sequence_counter = 0; while (1) { switch(search_word(START_CODE, 0xff)) { case SEQUENCE_START_CODE: {field int remove_start_code:32;} sequence(); continue; case SEQUENCE_END_CODE: {field int remove_start_code:32;} sequence_end(); continue; case SEQUENCE_ERROR_START_CODE: {field int remove_start_code:32;} sequence_error(); continue; default: {field int remove_start_code:32;} continue; } } } void sequence(void) { sequence_counter++; while (1) { switch(search_word(START_CODE, 0xff)) { case EXTENSION_START_CODE: {field int remove_start_code:32;} continue; case USER_DATA_START_CODE: {field int remove_start_code:32;} user_data(SEQUENCE_START_CODE); continue; case GROUP_START_CODE: {field int remove_start_code:32;} group_of_pictures_header(); continue; case PICTURE_START_CODE: {field int remove_start_code:32;} picture_header(); continue; default: return; } } } void sequence_end(void) { } void sequence_error(void) { } void group_of_pictures_header(void) { while(1) { switch(search_word(START_CODE, 0xff)) { case EXTENSION_START_CODE: {field int remove_start_code:32;} continue; case USER_DATA_START_CODE: {field int remove_start_code:32;} user_data(GROUP_START_CODE); continue; case PICTURE_START_CODE: {field int remove_start_code:32;} picture_header(); continue; default: return; } } } void user_data(int start_code) { int cc_data, cc_count, skip; switch(start_code) { // no sequence-based caption systems // GOP-based caption systems: DVD case GROUP_START_CODE: if (looking_at_bits(DVD_CLOSED_CAPTION, 32)) { field "0x%08x" int DVD_identifier:32; field "%d" cc_count:8; // patterns: 0x00 = Field 2 then Field 1, 0x80 = Field 1 then Field 2 int pattern = cc_count & 0x80; int desired_packet = 0; if (ccPick == 0 && pattern == 0x00) { desired_packet = 1; } if (ccPick == 1 && pattern == 0x80) { desired_packet = 1; } cc_count &= 0x1e; cc_count /= 2; // printf ("\n%d: ", cc_count); int i, j; for (i=0; i < cc_count; i++) { for (j=0; j < 2; j++) { field int caption_field:8; if ((caption_field != DVD_FIELD[ccPick]) || (j != desired_packet)) { field skip:16; // printf ("%x/%x ", caption_field, skip); } else { field output cc_data:16; // printf ("%x/%x ", caption_field, cc_data); } } } return; } // frame-based caption systems: DSS-SD, ATSC, ReplayTV (4000 & 5000 series), DishNetwork case PICTURE_START_CODE: if(looking_at_bits(ATSC_CLOSED_CAPTION, 32)) { field "0x%08x" int ATSC_identifier:32; field "%d" int user_data_type_code:8; if(user_data_type_code == 0x03) { field "%d" int process_em_data_flag:1; field "%d" int process_cc_data_flag:1; field "%d" int additional_data_flag:1; field "%d" cc_count:5; field "%d" int em_data:8; int i, j; if(current_picture_coding_type != 3) { // I or P picture for(i=0; i < MAX_CLOSED_CAPTION_DATA_PER_PICTURE; i++) { if(cc_buffer[i] == -1) { break; } output cc_buffer[i]:16; cc_buffer[i] = -1; } } j=0; for(i=0; i < cc_count; i++) { field "0x%02x" int marker_bits:5; field "%d" int cc_valid:1; field "%d" int cc_type:2; field "0x%04x" cc_data:16; if(cc_valid && ccPick == cc_type) { if(current_picture_coding_type == 3) { // B picture output cc_data:16; } else { // I or P picture cc_buffer[j++] = cc_data; } } } field "0x%02x" int marker_bits:8; if(additional_data_flag) { while(!looking_at_bits(0x000001, 24)) { field "0x%02x" int additional_usr_data:8; } } } else { // user_data_type_code != 0x03 while(!looking_at_bits(0x000001, 24)) { field "0x%02x" int byte_data:8; } } return; } else if(looking_at_bits(REPLAYTV_4000_CLOSED_CAPTION, 16)) { field "0x%04x" int ReplayTV4000_identifier:16; // Field 2 data if (ccPick) { field output cc_data:16; } else { field skip:16; } field skip:16; // 0xcc02 // Field 1 data if (!ccPick) { field output cc_data:16; } else { field skip:16; } return; } else if(looking_at_bits(REPLAYTV_5000_CLOSED_CAPTION, 16)) { field "0x%04x" int ReplayTV5000_identifier:16; // Field 1 data if (!ccPick) { field output cc_data:16; } else { field skip:16; } field skip:16; // 0xaa02 // Field 2 data if (ccPick) { field output cc_data:16; } else { field skip:16; } return; } else if(looking_at_bits(DISH_NETWORK_CLOSED_CAPTION, 16)) { field "0x%04x" int DishNetwork_identifier:16; // Dish Network does not transmit Field 2 if (ccPick) { return; } int repeater; field skip:8; // 0x04 field skip:16; // 2-byte counter field skip:16; // 2-byte checksum? field int DN_pattern:8; switch(DN_pattern) { case DN_2_BYTE: field skip:8; // 0x09 field output cc_data:16; field repeater:8; if (repeater == REPEAT) { output cc_data:16; } break; case DN_4_BYTE: field skip:8; // 0x09 field output cc_data:32; field skip:32; // 0x020a, followed by 2-byte checksum? break; case DN_BUFFERED: // play the previous buffered caption first if (DN_buffer > 0) { output DN_buffer:32; } field skip:8; // 0x04 field skip:8; // 0x09 field skip:32; // last buffered caption field int count:8; field skip:8; // 0x09 switch (count) { case DN_2_BYTE: field cc_data:16; DN_buffer = cc_data * 256; field repeater:8; if (repeater == REPEAT) { DN_buffer += cc_data; } break; case DN_4_BYTE: field cc_data:32; DN_buffer = cc_data; field skip:8; // 0x02 break; } break; } return; } else { // This stream seems to be a DSS-SD video stream while(!looking_at_bits(0x000001, 24)) { field "%d" int user_data_length:8; if(user_data_length == 0) return; field "====== <%s> (0x%02x)" enum { Forbidden = 0x00, Presentation_Time_Stamp = 0x02, Decode_Time_Stamp = 0x04, Chroma_Flags = 0x05, Pan_and_Scan = 0x06, Field_Display_Flags = 0x07, No_Burst = 0x08, Closed_Caption = 0x09, Extended_Data_Services = 0x0a, Reserved = 0x0b..0xfe, Escape_to_ext_user_data_type = 0xff } user_data_type:8; if (user_data_type == Closed_Caption) { field cc_data:16; if(current_picture_coding_type != 3) { // I or P picture if(cc_buffer[0] != -1) { output cc_buffer[0]:16; } } else { cc_buffer[0] = cc_data; } } } return; } } // end of "switch(start_code)" while(!looking_at_bits(0x000001, 24)) { field "0x%02x" int byte_data:8; } } void picture_header(void) { sequence_counter = 0; field "%d" int temporal_reference:10; field "<%s> (%d)" enum { forbidden, I_Picture, P_Picture, B_Picture, D_Picture_shall_not_be_used } picture_coding_type:3; current_picture_coding_type = picture_coding_type; while(1) { int start_code = search_word(START_CODE, 0xff); if(start_code >= 0x00000101 && start_code < 0x000001af) { {field int remove_start_code:32;} continue; } switch(start_code) { case PICTURE_START_CODE: case SEQUENCE_START_CODE: case SEQUENCE_END_CODE: case SEQUENCE_ERROR_START_CODE: case GROUP_START_CODE: return; case USER_DATA_START_CODE: {field int remove_start_code:32;} user_data(PICTURE_START_CODE); continue; default: {field int remove_start_code:32;} continue; } } }