12 #include "../stdafx.h"
13 #include "../core/endian_func.hpp"
14 #include "../string_func.h"
15 #include "../table/control_codes.h"
20 #include "../table/strgen_tables.h"
22 #include "../safeguards.h"
28 const char *
_file =
"(unknown file)";
30 int _errors, _warnings, _show_todo;
34 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei);
43 caseidx(caseidx), string(
stredup(string)), next(next)
63 hash_next(0), index(index), line(line), translated_case(NULL)
122 for (; *s !=
'\0'; s++) hash =
ROL(hash, 3) ^ *s;
152 if (strcmp(ls->
name, s) == 0)
return ls;
166 for (; *s !=
'\0'; s++) {
167 hash =
ROL(hash, 3) ^ *s;
168 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
192 hash ^= i * 0x717239;
193 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
197 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
200 hash ^= (cs - _cmd_structs) * 0x1234567;
201 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
220 static const char *_cur_ident;
235 static int _cur_argidx;
256 }
else if (value < 0x800) {
257 *this->
Append() = 0xC0 +
GB(value, 6, 5);
258 *this->
Append() = 0x80 +
GB(value, 0, 6);
259 }
else if (value < 0x10000) {
260 *this->
Append() = 0xE0 +
GB(value, 12, 4);
261 *this->
Append() = 0x80 +
GB(value, 6, 6);
262 *this->
Append() = 0x80 +
GB(value, 0, 6);
263 }
else if (value < 0x110000) {
264 *this->
Append() = 0xF0 +
GB(value, 18, 3);
265 *this->
Append() = 0x80 +
GB(value, 12, 6);
266 *this->
Append() = 0x80 +
GB(value, 6, 6);
267 *this->
Append() = 0x80 +
GB(value, 0, 6);
269 strgen_warning(
"Invalid unicode value U+0x%X", value);
274 size_t Utf8Validate(
const char *s)
281 }
else if (
GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
283 c =
GB(s[0], 0, 5) << 6 |
GB(s[1], 0, 6);
284 if (c >= 0x80)
return 2;
285 }
else if (
GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
287 c =
GB(s[0], 0, 4) << 12 |
GB(s[1], 0, 6) << 6 |
GB(s[2], 0, 6);
288 if (c >= 0x800)
return 3;
289 }
else if (
GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
291 c =
GB(s[0], 0, 3) << 18 |
GB(s[1], 0, 6) << 12 |
GB(s[2], 0, 6) << 6 |
GB(s[3], 0, 6);
292 if (c >= 0x10000 && c <= 0x10FFFF)
return 4;
299 void EmitSingleChar(
Buffer *buffer,
char *buf,
int value)
301 if (*buf !=
'\0') strgen_warning(
"Ignoring trailing letters in command");
312 bool ParseRelNum(
char **buf,
int *value,
int *offset)
314 const char *s = *buf;
318 while (*s ==
' ' || *s ==
'\t') s++;
323 int v = strtol(s, &end, 0);
324 if (end == s)
return false;
330 if (offset != NULL && *end ==
':') {
333 *offset = strtol(s, &end, 0);
334 if (end == s)
return false;
341 char *ParseWord(
char **buf)
345 while (*s ==
' ' || *s ==
'\t') s++;
346 if (*s ==
'\0')
return NULL;
352 if (*s ==
'\0')
break;
363 if (*s ==
'\0')
break;
364 if (*s ==
' ' || *s ==
'\t') {
376 static int TranslateArgumentIdx(
int arg,
int offset = 0);
378 static void EmitWordList(
Buffer *buffer,
const char *
const *words, uint nw)
381 for (uint i = 0; i < nw; i++) buffer->
AppendByte((uint)strlen(words[i]) + 1);
382 for (uint i = 0; i < nw; i++) {
383 for (uint j = 0; words[i][j] !=
'\0'; j++) buffer->
AppendByte(words[i][j]);
388 void EmitPlural(
Buffer *buffer,
char *buf,
int value)
390 int argidx = _cur_argidx;
397 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
401 words[nw] = ParseWord(&buf);
402 if (words[nw] == NULL)
break;
406 strgen_fatal(
"%s: No plural words", _cur_ident);
409 if (expected != nw) {
411 strgen_fatal(
"%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
414 if ((_show_todo & 2) != 0) strgen_warning(
"'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
418 for (; nw < expected; nw++) {
419 words[nw] = words[nw - 1];
427 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
428 EmitWordList(buffer, words, nw);
432 void EmitGender(
Buffer *buffer,
char *buf,
int value)
434 int argidx = _cur_argidx;
443 if (nw >=
MAX_NUM_GENDERS) strgen_fatal(
"G argument '%s' invalid", buf);
453 if (!ParseRelNum(&buf, &argidx, &offset)) {}
455 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
456 if (cmd == NULL || (cmd->flags &
C_GENDER) == 0) {
457 strgen_fatal(
"Command '%s' can't have a gender", cmd == NULL ?
"<empty>" : cmd->cmd);
461 words[nw] = ParseWord(&buf);
462 if (words[nw] == NULL)
break;
464 if (nw != _lang.
num_genders) strgen_fatal(
"Bad # of arguments for gender command");
466 assert(
IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
468 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
469 EmitWordList(buffer, words, nw);
473 static const CmdStruct *FindCmd(
const char *s,
int len)
475 for (
const CmdStruct *cs = _cmd_structs; cs !=
endof(_cmd_structs); cs++) {
476 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] ==
'\0')
return cs;
481 static uint ResolveCaseName(
const char *str,
size_t len)
486 memcpy(case_str, str, len);
487 case_str[len] =
'\0';
490 if (case_idx >=
MAX_NUM_CASES) strgen_fatal(
"Invalid case-name '%s'", case_str);
497 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei)
499 const char *s = *str, *start;
506 for (; *s !=
'{'; s++) {
507 if (*s ==
'\0')
return NULL;
511 if (*s >=
'0' && *s <=
'9') {
514 *argno = strtoul(s, &end, 0);
515 if (*end !=
':') strgen_fatal(
"missing arg #");
523 }
while (c !=
'}' && c !=
' ' && c !=
'=' && c !=
'.' && c != 0);
525 const CmdStruct *cmd = FindCmd(start, s - start - 1);
527 strgen_error(
"Undefined command '%.*s'", (
int)(s - start - 1), start);
532 const char *casep = s;
534 if (!(cmd->flags &
C_CASE)) {
535 strgen_fatal(
"Command '%s' can't have a case", cmd->cmd);
540 }
while (c !=
'}' && c !=
' ' && c !=
'\0');
541 *casei = ResolveCaseName(casep, s - casep - 1);
545 strgen_error(
"Missing } from command '%s'", start);
558 strgen_error(
"Missing } from command '%s'", start);
580 data(data), file(
stredup(file)), master(master), translation(translation)
597 memset(p, 0,
sizeof(*p));
601 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
603 if (ar == NULL)
break;
606 if (argno != -1 && ar->consumes == 0) strgen_fatal(
"Non consumer param can't have a paramindex");
609 if (argno != -1) argidx = argno;
610 if (argidx < 0 || (uint)argidx >=
lengthof(p->cmd)) strgen_fatal(
"invalid param idx %d", argidx);
611 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal(
"duplicate param idx %d", argidx);
613 p->cmd[argidx++] = ar;
615 if (p->np >=
lengthof(p->pairs)) strgen_fatal(
"too many commands in string, max " PRINTF_SIZE,
lengthof(p->pairs));
616 p->pairs[p->np].a = ar;
617 p->pairs[p->np].v = param[0] !=
'\0' ?
stredup(param) :
"";
626 if (a == NULL)
return NULL;
628 if (strcmp(a->cmd,
"STRING1") == 0 ||
629 strcmp(a->cmd,
"STRING2") == 0 ||
630 strcmp(a->cmd,
"STRING3") == 0 ||
631 strcmp(a->cmd,
"STRING4") == 0 ||
632 strcmp(a->cmd,
"STRING5") == 0 ||
633 strcmp(a->cmd,
"STRING6") == 0 ||
634 strcmp(a->cmd,
"STRING7") == 0 ||
635 strcmp(a->cmd,
"RAW_STRING") == 0) {
636 return FindCmd(
"STRING", 6);
643 static bool CheckCommandsMatch(
char *a,
char *b,
const char *name)
655 ExtractCommandString(&templ, b,
true);
656 ExtractCommandString(&lang, a,
true);
659 if (templ.np != lang.np) {
660 strgen_warning(
"%s: template string and language string have a different # of commands", name);
664 for (uint i = 0; i < templ.np; i++) {
667 for (uint j = 0; j < lang.np; j++) {
668 if (templ.pairs[i].a == lang.pairs[j].a &&
669 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
671 lang.pairs[j].a = NULL;
678 strgen_warning(
"%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
685 for (uint i = 0; i <
lengthof(templ.cmd); i++) {
686 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
687 strgen_warning(
"%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
688 lang.cmd[i] == NULL ?
"<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
689 templ.cmd[i] == NULL ?
"<empty>" : templ.cmd[i]->cmd);
697 void StringReader::HandleString(
char *str)
700 if (str[1] ==
'#' && str[2] !=
'#') this->
HandlePragma(str + 2);
705 if (*str ==
';' || *str ==
' ' || *str ==
'\0')
return;
707 char *s = strchr(str,
':');
709 strgen_error(
"Line has no ':' delimiter");
716 for (t = s; t > str && (t[-1] ==
' ' || t[-1] ==
'\t'); t--) {}
722 for (tmp = s; *tmp !=
'\0';) {
723 size_t len = Utf8Validate(tmp);
724 if (len == 0) strgen_fatal(
"Invalid UTF-8 sequence in '%s'", s);
730 (c >= 0xE000 && c <= 0xF8FF) ||
731 (c >= 0xFFF0 && c <= 0xFFFF)) {
732 strgen_fatal(
"Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
740 char *casep = strchr(str,
'.');
741 if (casep != NULL) *casep++ =
'\0';
748 strgen_error(
"Cases in the base translation are not supported.");
753 strgen_error(
"String name '%s' is used multiple times", str);
757 if (this->
data.
strings[this->data.next_string_id] != NULL) {
758 strgen_error(
"String ID 0x%X for '%s' already in use by '%s'", this->
data.
next_string_id, str, this->data.strings[this->data.next_string_id]->name);
766 strgen_warning(
"String name '%s' does not exist in master file", str);
771 strgen_error(
"String name '%s' is used multiple times", str);
776 if (!CheckCommandsMatch(s, ent->
english, str))
return;
792 if (!memcmp(str,
"plural ", 7)) {
795 strgen_fatal(
"Invalid pluralform %d", _lang.
plural_form);
798 strgen_fatal(
"unknown pragma '%s'", str);
802 static void rstrip(
char *buf)
804 size_t i = strlen(buf);
805 while (i > 0 && (buf[i - 1] ==
'\r' || buf[i - 1] ==
'\n' || buf[i - 1] ==
' ')) i--;
812 _warnings = _errors = 0;
826 this->HandleString(buf);
848 static int TranslateArgumentIdx(
int argidx,
int offset)
852 if (argidx < 0 || (uint)argidx >=
lengthof(_cur_pcs.cmd)) {
853 strgen_fatal(
"invalid argidx %d", argidx);
855 const CmdStruct *cs = _cur_pcs.cmd[argidx];
856 if (cs != NULL && cs->consumes <= offset) {
857 strgen_fatal(
"invalid argidx offset %d:%d", argidx, offset);
860 if (_cur_pcs.cmd[argidx] == NULL) {
861 strgen_fatal(
"no command for this argidx %d", argidx);
864 for (
int i = sum = 0; i < argidx; i++) {
867 sum += (cs != NULL) ? cs->consumes : 1;
873 static void PutArgidxCommand(
Buffer *buffer)
876 buffer->
AppendByte(TranslateArgumentIdx(_cur_argidx));
880 static void PutCommandString(
Buffer *buffer,
const char *str)
884 while (*str !=
'\0') {
894 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
895 if (cs == NULL)
break;
903 if (cs->consumes > 0) {
905 if (argno != -1 && argno != _cur_argidx) {
907 PutArgidxCommand(buffer);
911 cs = _cur_pcs.cmd[_cur_argidx++];
913 strgen_fatal(
"%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
917 cs->proc(buffer, param, cs->value);
929 if (length >= 0x4000) {
930 strgen_fatal(
"string too long");
933 if (length >= 0xC0) {
934 buffer[offs++] = (length >> 8) | 0xC0;
936 buffer[offs++] = length & 0xFF;
937 this->
Write((byte*)buffer, offs);
947 for (
size_t tab = 0; tab < data.
tabs; tab++) {
951 _lang.
offsets[tab] = TO_LE16(n);
953 for (uint j = 0; j != in_use[tab]; j++) {
967 for (
size_t tab = 0; tab < data.
tabs; tab++) {
968 for (uint j = 0; j != in_use[tab]; j++) {
979 _cur_ident = ls->
name;
983 if (_show_todo > 0 && ls->
translated == NULL) {
984 if ((_show_todo & 2) != 0) {
985 strgen_warning(
"'%s' is untranslated", ls->
name);
987 if ((_show_todo & 1) != 0) {
988 const char *s =
"<TODO> ";
994 ExtractCommandString(&_cur_pcs, ls->
english,
false);
1006 if (casep != NULL) {
1016 for (num = 0, c = casep; c; c = c->
next) num++;
1020 for (c = casep; c != NULL; c = c->
next) {
1023 uint pos = buffer.
Length();
1027 PutCommandString(&buffer, c->
string);
1030 uint size = buffer.
Length() - (pos + 2);
1031 buffer[pos + 0] =
GB(size, 8, 8);
1032 buffer[pos + 1] =
GB(size, 0, 8);
1036 if (cmdp != NULL) PutCommandString(&buffer, cmdp);