diff options
Diffstat (limited to 'src/srt.c')
-rw-r--r-- | src/srt.c | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/src/srt.c b/src/srt.c new file mode 100644 index 0000000..523eb5a --- /dev/null +++ b/src/srt.c @@ -0,0 +1,145 @@ +/* + * This file is part of WordExtract. + * + * Copyright (C) 2009 Borisov Alexandr + * + * WordExtract is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * WordExtract is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with WordExtract. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <ctype.h> +#include "main.h" +#include "srt.h" +#include "engparser.h" + +static int get_srt_tag_length(char *); + +/* + * Reads text lines from *.srt file + * NOTE that "\r\n" and "\n" line ends are possible + */ +int process_srt(FILE *subtitle) +{ + char line[MAXLINE] = {0}; + int lines; + + while (!feof(subtitle)) { + for (lines = 0; lines < 2; lines++) { + fgets(line, MAXLINE, subtitle); + if (feof(subtitle)) return 0; + } + fgets(line, MAXLINE, subtitle); + process_srt_line(line); + fgets(line, MAXLINE, subtitle); + if (!((line[0] == '\n')||((line[0] == '\r')&&(line[1] == '\n')))) { + process_srt_line(line); + fgets(line, MAXLINE, subtitle); + } + } + return 0; +} + +/* + * Recieves: line from *.srt file with final '\n' or "\r\n" + */ +int process_srt_line(char *line) +{ + char phrase[MAXPHRASE] = {0}; + int i; + int taglength; + + for (i = 0; !((line[0] == '\n')||((line[0] == '\r')&&(line[1] == '\n'))); i++, line++) { + while (*line == '<') { + if (!(taglength = get_srt_tag_length(line))) + break; + line += taglength; + } + if ((line[0] == '\n')||((line[0] == '\r')&&(line[1] == '\n'))) + break; + phrase[i] = *line; + } + phrase[i] = '\n'; + switch (lang) { + case ENG: parseengphrase(phrase); + } + return 0; +} + +/* + * It processes tags and calculate it's length. Possible tags are: + * <font color="#00FF00" size="6">The <font size="35">most</font> difficult tag</font> + * <b><i><u>Some of formatted text</u></i></b> + * It also misses tags with mistakes + */ +static int get_srt_tag_length(char *line) +{ + char c; + int taglength; + + if (line[1] == '/') { + c = line[2]; + switch (c) { + case 'b': case 'i': case 'u': + taglength = 4; + break; + case 'f': + taglength = 7; + break; + default: + taglength = 0; + break; + } + } + else { + c = line[1]; + switch (c) { + case 'b': case 'i': case 'u': + taglength = 3; + break; + case 'f': + taglength = 5; + c = line[5]; + while (c == ' ') { + taglength++; + c = line[taglength]; + switch (c) { + case 'c': + taglength += 15; + break; + case 's': + taglength += 6; + while isdigit(line[taglength]) + taglength++; + taglength++; + break; + case '>': + break; + default: + taglength = 0; + } + c = line[taglength]; + } + if (c == '>') + taglength++; + else + taglength = 0; + break; + default: + taglength = 0; + break; + } + } + return taglength; +} + |