OpenTTD
strgen_base.cpp
Go to the documentation of this file.
1 /* $Id: strgen_base.cpp 26511 2014-04-25 17:43:09Z rubidium $ */
2 
3 /*
4  * This file is part of OpenTTD.
5  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8  */
9 
12 #include "../stdafx.h"
13 #include "../core/endian_func.hpp"
14 #include "../string_func.h"
15 #include "../table/control_codes.h"
16 
17 #include "strgen.h"
18 
19 
20 #include "../table/strgen_tables.h"
21 
22 #include "../safeguards.h"
23 
24 /* Compiles a list of strings into a compiled string list */
25 
26 static bool _translated;
27 static bool _translation;
28 const char *_file = "(unknown file)";
29 int _cur_line;
30 int _errors, _warnings, _show_todo;
32 
33 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100;
34 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
35 
42 Case::Case(int caseidx, const char *string, Case *next) :
43  caseidx(caseidx), string(stredup(string)), next(next)
44 {
45 }
46 
49 {
50  free(this->string);
51  delete this->next;
52 }
53 
61 LangString::LangString(const char *name, const char *english, int index, int line) :
62  name(stredup(name)), english(stredup(english)), translated(NULL),
63  hash_next(0), index(index), line(line), translated_case(NULL)
64 {
65 }
66 
69 {
70  free(this->name);
71  free(this->english);
72  free(this->translated);
73  delete this->translated_case;
74 }
75 
78 {
79  free(this->translated);
80  this->translated = NULL;
81 
82  delete this->translated_case;
83  this->translated_case = NULL;
84 }
85 
90 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
91 {
92  this->strings = CallocT<LangString *>(max_strings);
93  this->hash_heads = CallocT<uint16>(max_strings);
94  this->next_string_id = 0;
95 }
96 
99 {
100  for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
101  free(this->strings);
102  free(this->hash_heads);
103 }
104 
107 {
108  for (size_t i = 0; i < this->max_strings; i++) {
109  LangString *ls = this->strings[i];
110  if (ls != NULL) ls->FreeTranslation();
111  }
112 }
113 
119 uint StringData::HashStr(const char *s) const
120 {
121  uint hash = 0;
122  for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
123  return hash % this->max_strings;
124 }
125 
131 void StringData::Add(const char *s, LangString *ls)
132 {
133  uint hash = this->HashStr(s);
134  ls->hash_next = this->hash_heads[hash];
135  /* Off-by-one for hash find. */
136  this->hash_heads[hash] = ls->index + 1;
137  this->strings[ls->index] = ls;
138 }
139 
146 {
147  int idx = this->hash_heads[this->HashStr(s)];
148 
149  while (--idx >= 0) {
150  LangString *ls = this->strings[idx];
151 
152  if (strcmp(ls->name, s) == 0) return ls;
153  idx = ls->hash_next;
154  }
155  return NULL;
156 }
157 
164 uint StringData::VersionHashStr(uint hash, const char *s) const
165 {
166  for (; *s != '\0'; s++) {
167  hash = ROL(hash, 3) ^ *s;
168  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
169  }
170  return hash;
171 }
172 
178 {
179  uint hash = 0;
180 
181  for (size_t i = 0; i < this->max_strings; i++) {
182  const LangString *ls = this->strings[i];
183 
184  if (ls != NULL) {
185  const CmdStruct *cs;
186  const char *s;
187  char buf[MAX_COMMAND_PARAM_SIZE];
188  int argno;
189  int casei;
190 
191  s = ls->name;
192  hash ^= i * 0x717239;
193  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
194  hash = this->VersionHashStr(hash, s + 1);
195 
196  s = ls->english;
197  while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
198  if (cs->flags & C_DONTCOUNT) continue;
199 
200  hash ^= (cs - _cmd_structs) * 0x1234567;
201  hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
202  }
203  }
204  }
205 
206  return hash;
207 }
208 
213 uint StringData::CountInUse(uint tab) const
214 {
215  int i;
216  for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != NULL) break;
217  return i + 1;
218 }
219 
220 static const char *_cur_ident;
221 
222 struct CmdPair {
223  const CmdStruct *a;
224  const char *v;
225 };
226 
228  uint np;
229  CmdPair pairs[32];
230  const CmdStruct *cmd[32]; // ordered by param #
231 };
232 
233 /* Used when generating some advanced commands. */
234 static ParsedCommandStruct _cur_pcs;
235 static int _cur_argidx;
236 
238 struct Buffer : SmallVector<byte, 256> {
243  void AppendByte(byte value)
244  {
245  *this->Append() = value;
246  }
247 
252  void AppendUtf8(uint32 value)
253  {
254  if (value < 0x80) {
255  *this->Append() = value;
256  } else if (value < 0x800) {
257  *this->Append() = 0xC0 + GB(value, 6, 5);
258  *this->Append() = 0x80 + GB(value, 0, 6);
259  } else if (value < 0x10000) {
260  *this->Append() = 0xE0 + GB(value, 12, 4);
261  *this->Append() = 0x80 + GB(value, 6, 6);
262  *this->Append() = 0x80 + GB(value, 0, 6);
263  } else if (value < 0x110000) {
264  *this->Append() = 0xF0 + GB(value, 18, 3);
265  *this->Append() = 0x80 + GB(value, 12, 6);
266  *this->Append() = 0x80 + GB(value, 6, 6);
267  *this->Append() = 0x80 + GB(value, 0, 6);
268  } else {
269  strgen_warning("Invalid unicode value U+0x%X", value);
270  }
271  }
272 };
273 
274 size_t Utf8Validate(const char *s)
275 {
276  uint32 c;
277 
278  if (!HasBit(s[0], 7)) {
279  /* 1 byte */
280  return 1;
281  } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
282  /* 2 bytes */
283  c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
284  if (c >= 0x80) return 2;
285  } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
286  /* 3 bytes */
287  c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
288  if (c >= 0x800) return 3;
289  } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
290  /* 4 bytes */
291  c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
292  if (c >= 0x10000 && c <= 0x10FFFF) return 4;
293  }
294 
295  return 0;
296 }
297 
298 
299 void EmitSingleChar(Buffer *buffer, char *buf, int value)
300 {
301  if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
302  buffer->AppendUtf8(value);
303 }
304 
305 
306 /* The plural specifier looks like
307  * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
308 
309 /* This is encoded like
310  * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
311 
312 bool ParseRelNum(char **buf, int *value, int *offset)
313 {
314  const char *s = *buf;
315  char *end;
316  bool rel = false;
317 
318  while (*s == ' ' || *s == '\t') s++;
319  if (*s == '+') {
320  rel = true;
321  s++;
322  }
323  int v = strtol(s, &end, 0);
324  if (end == s) return false;
325  if (rel || v < 0) {
326  *value += v;
327  } else {
328  *value = v;
329  }
330  if (offset != NULL && *end == ':') {
331  /* Take the Nth within */
332  s = end + 1;
333  *offset = strtol(s, &end, 0);
334  if (end == s) return false;
335  }
336  *buf = end;
337  return true;
338 }
339 
340 /* Parse out the next word, or NULL */
341 char *ParseWord(char **buf)
342 {
343  char *s = *buf, *r;
344 
345  while (*s == ' ' || *s == '\t') s++;
346  if (*s == '\0') return NULL;
347 
348  if (*s == '"') {
349  r = ++s;
350  /* parse until next " or NUL */
351  for (;;) {
352  if (*s == '\0') break;
353  if (*s == '"') {
354  *s++ = '\0';
355  break;
356  }
357  s++;
358  }
359  } else {
360  /* proceed until whitespace or NUL */
361  r = s;
362  for (;;) {
363  if (*s == '\0') break;
364  if (*s == ' ' || *s == '\t') {
365  *s++ = '\0';
366  break;
367  }
368  s++;
369  }
370  }
371  *buf = s;
372  return r;
373 }
374 
375 /* Forward declaration */
376 static int TranslateArgumentIdx(int arg, int offset = 0);
377 
378 static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
379 {
380  buffer->AppendByte(nw);
381  for (uint i = 0; i < nw; i++) buffer->AppendByte((uint)strlen(words[i]) + 1);
382  for (uint i = 0; i < nw; i++) {
383  for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
384  buffer->AppendByte(0);
385  }
386 }
387 
388 void EmitPlural(Buffer *buffer, char *buf, int value)
389 {
390  int argidx = _cur_argidx;
391  int offset = 0;
392  int expected = _plural_forms[_lang.plural_form].plural_count;
393  const char **words = AllocaM(const char *, max(expected, MAX_PLURALS));
394  int nw = 0;
395 
396  /* Parse out the number, if one exists. Otherwise default to prev arg. */
397  if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
398 
399  /* Parse each string */
400  for (nw = 0; nw < MAX_PLURALS; nw++) {
401  words[nw] = ParseWord(&buf);
402  if (words[nw] == NULL) break;
403  }
404 
405  if (nw == 0) {
406  strgen_fatal("%s: No plural words", _cur_ident);
407  }
408 
409  if (expected != nw) {
410  if (_translated) {
411  strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
412  expected, nw);
413  } else {
414  if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
415  if (nw > expected) {
416  nw = expected;
417  } else {
418  for (; nw < expected; nw++) {
419  words[nw] = words[nw - 1];
420  }
421  }
422  }
423  }
424 
425  buffer->AppendUtf8(SCC_PLURAL_LIST);
426  buffer->AppendByte(_lang.plural_form);
427  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
428  EmitWordList(buffer, words, nw);
429 }
430 
431 
432 void EmitGender(Buffer *buffer, char *buf, int value)
433 {
434  int argidx = _cur_argidx;
435  int offset = 0;
436  uint nw;
437 
438  if (buf[0] == '=') {
439  buf++;
440 
441  /* This is a {G=DER} command */
442  nw = _lang.GetGenderIndex(buf);
443  if (nw >= MAX_NUM_GENDERS) strgen_fatal("G argument '%s' invalid", buf);
444 
445  /* now nw contains the gender index */
446  buffer->AppendUtf8(SCC_GENDER_INDEX);
447  buffer->AppendByte(nw);
448  } else {
449  const char *words[MAX_NUM_GENDERS];
450 
451  /* This is a {G 0 foo bar two} command.
452  * If no relative number exists, default to +0 */
453  if (!ParseRelNum(&buf, &argidx, &offset)) {}
454 
455  const CmdStruct *cmd = _cur_pcs.cmd[argidx];
456  if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
457  strgen_fatal("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
458  }
459 
460  for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
461  words[nw] = ParseWord(&buf);
462  if (words[nw] == NULL) break;
463  }
464  if (nw != _lang.num_genders) strgen_fatal("Bad # of arguments for gender command");
465 
466  assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
467  buffer->AppendUtf8(SCC_GENDER_LIST);
468  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
469  EmitWordList(buffer, words, nw);
470  }
471 }
472 
473 static const CmdStruct *FindCmd(const char *s, int len)
474 {
475  for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
476  if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
477  }
478  return NULL;
479 }
480 
481 static uint ResolveCaseName(const char *str, size_t len)
482 {
483  /* First get a clean copy of only the case name, then resolve it. */
484  char case_str[CASE_GENDER_LEN];
485  len = min(lengthof(case_str) - 1, len);
486  memcpy(case_str, str, len);
487  case_str[len] = '\0';
488 
489  uint8 case_idx = _lang.GetCaseIndex(case_str);
490  if (case_idx >= MAX_NUM_CASES) strgen_fatal("Invalid case-name '%s'", case_str);
491  return case_idx + 1;
492 }
493 
494 
495 /* returns NULL on eof
496  * else returns command struct */
497 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
498 {
499  const char *s = *str, *start;
500  char c;
501 
502  *argno = -1;
503  *casei = -1;
504 
505  /* Scan to the next command, exit if there's no next command. */
506  for (; *s != '{'; s++) {
507  if (*s == '\0') return NULL;
508  }
509  s++; // Skip past the {
510 
511  if (*s >= '0' && *s <= '9') {
512  char *end;
513 
514  *argno = strtoul(s, &end, 0);
515  if (*end != ':') strgen_fatal("missing arg #");
516  s = end + 1;
517  }
518 
519  /* parse command name */
520  start = s;
521  do {
522  c = *s++;
523  } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
524 
525  const CmdStruct *cmd = FindCmd(start, s - start - 1);
526  if (cmd == NULL) {
527  strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
528  return NULL;
529  }
530 
531  if (c == '.') {
532  const char *casep = s;
533 
534  if (!(cmd->flags & C_CASE)) {
535  strgen_fatal("Command '%s' can't have a case", cmd->cmd);
536  }
537 
538  do {
539  c = *s++;
540  } while (c != '}' && c != ' ' && c != '\0');
541  *casei = ResolveCaseName(casep, s - casep - 1);
542  }
543 
544  if (c == '\0') {
545  strgen_error("Missing } from command '%s'", start);
546  return NULL;
547  }
548 
549 
550  if (c != '}') {
551  if (c == '=') s--;
552  /* copy params */
553  start = s;
554  for (;;) {
555  c = *s++;
556  if (c == '}') break;
557  if (c == '\0') {
558  strgen_error("Missing } from command '%s'", start);
559  return NULL;
560  }
561  if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
562  *param++ = c;
563  }
564  }
565  *param = '\0';
566 
567  *str = s;
568 
569  return cmd;
570 }
571 
579 StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
580  data(data), file(stredup(file)), master(master), translation(translation)
581 {
582 }
583 
586 {
587  free(file);
588 }
589 
590 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
591 {
592  char param[MAX_COMMAND_PARAM_SIZE];
593  int argno;
594  int argidx = 0;
595  int casei;
596 
597  memset(p, 0, sizeof(*p));
598 
599  for (;;) {
600  /* read until next command from a. */
601  const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
602 
603  if (ar == NULL) break;
604 
605  /* Sanity checking */
606  if (argno != -1 && ar->consumes == 0) strgen_fatal("Non consumer param can't have a paramindex");
607 
608  if (ar->consumes) {
609  if (argno != -1) argidx = argno;
610  if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) strgen_fatal("invalid param idx %d", argidx);
611  if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal("duplicate param idx %d", argidx);
612 
613  p->cmd[argidx++] = ar;
614  } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
615  if (p->np >= lengthof(p->pairs)) strgen_fatal("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
616  p->pairs[p->np].a = ar;
617  p->pairs[p->np].v = param[0] != '\0' ? stredup(param) : "";
618  p->np++;
619  }
620  }
621 }
622 
623 
624 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
625 {
626  if (a == NULL) return NULL;
627 
628  if (strcmp(a->cmd, "STRING1") == 0 ||
629  strcmp(a->cmd, "STRING2") == 0 ||
630  strcmp(a->cmd, "STRING3") == 0 ||
631  strcmp(a->cmd, "STRING4") == 0 ||
632  strcmp(a->cmd, "STRING5") == 0 ||
633  strcmp(a->cmd, "STRING6") == 0 ||
634  strcmp(a->cmd, "STRING7") == 0 ||
635  strcmp(a->cmd, "RAW_STRING") == 0) {
636  return FindCmd("STRING", 6);
637  }
638 
639  return a;
640 }
641 
642 
643 static bool CheckCommandsMatch(char *a, char *b, const char *name)
644 {
645  /* If we're not translating, i.e. we're compiling the base language,
646  * it is pointless to do all these checks as it'll always be correct.
647  * After all, all checks are based on the base language.
648  */
649  if (!_translation) return true;
650 
651  ParsedCommandStruct templ;
652  ParsedCommandStruct lang;
653  bool result = true;
654 
655  ExtractCommandString(&templ, b, true);
656  ExtractCommandString(&lang, a, true);
657 
658  /* For each string in templ, see if we find it in lang */
659  if (templ.np != lang.np) {
660  strgen_warning("%s: template string and language string have a different # of commands", name);
661  result = false;
662  }
663 
664  for (uint i = 0; i < templ.np; i++) {
665  /* see if we find it in lang, and zero it out */
666  bool found = false;
667  for (uint j = 0; j < lang.np; j++) {
668  if (templ.pairs[i].a == lang.pairs[j].a &&
669  strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
670  /* it was found in both. zero it out from lang so we don't find it again */
671  lang.pairs[j].a = NULL;
672  found = true;
673  break;
674  }
675  }
676 
677  if (!found) {
678  strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
679  result = false;
680  }
681  }
682 
683  /* if we reach here, all non consumer commands match up.
684  * Check if the non consumer commands match up also. */
685  for (uint i = 0; i < lengthof(templ.cmd); i++) {
686  if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
687  strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
688  lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
689  templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
690  result = false;
691  }
692  }
693 
694  return result;
695 }
696 
697 void StringReader::HandleString(char *str)
698 {
699  if (*str == '#') {
700  if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
701  return;
702  }
703 
704  /* Ignore comments & blank lines */
705  if (*str == ';' || *str == ' ' || *str == '\0') return;
706 
707  char *s = strchr(str, ':');
708  if (s == NULL) {
709  strgen_error("Line has no ':' delimiter");
710  return;
711  }
712 
713  char *t;
714  /* Trim spaces.
715  * After this str points to the command name, and s points to the command contents */
716  for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
717  *t = 0;
718  s++;
719 
720  /* Check string is valid UTF-8 */
721  const char *tmp;
722  for (tmp = s; *tmp != '\0';) {
723  size_t len = Utf8Validate(tmp);
724  if (len == 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s);
725 
726  WChar c;
727  Utf8Decode(&c, tmp);
728  if (c <= 0x001F || // ASCII control character range
729  c == 0x200B || // Zero width space
730  (c >= 0xE000 && c <= 0xF8FF) || // Private range
731  (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
732  strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
733  }
734 
735  tmp += len;
736  }
737 
738  /* Check if the string has a case..
739  * The syntax for cases is IDENTNAME.case */
740  char *casep = strchr(str, '.');
741  if (casep != NULL) *casep++ = '\0';
742 
743  /* Check if this string already exists.. */
744  LangString *ent = this->data.Find(str);
745 
746  if (this->master) {
747  if (casep != NULL) {
748  strgen_error("Cases in the base translation are not supported.");
749  return;
750  }
751 
752  if (ent != NULL) {
753  strgen_error("String name '%s' is used multiple times", str);
754  return;
755  }
756 
757  if (this->data.strings[this->data.next_string_id] != NULL) {
758  strgen_error("String ID 0x%X for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
759  return;
760  }
761 
762  /* Allocate a new LangString */
763  this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
764  } else {
765  if (ent == NULL) {
766  strgen_warning("String name '%s' does not exist in master file", str);
767  return;
768  }
769 
770  if (ent->translated && casep == NULL) {
771  strgen_error("String name '%s' is used multiple times", str);
772  return;
773  }
774 
775  /* make sure that the commands match */
776  if (!CheckCommandsMatch(s, ent->english, str)) return;
777 
778  if (casep != NULL) {
779  ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
780  } else {
781  ent->translated = stredup(s);
782  /* If the string was translated, use the line from the
783  * translated language so errors in the translated file
784  * are properly referenced to. */
785  ent->line = _cur_line;
786  }
787  }
788 }
789 
791 {
792  if (!memcmp(str, "plural ", 7)) {
793  _lang.plural_form = atoi(str + 7);
794  if (_lang.plural_form >= lengthof(_plural_forms)) {
795  strgen_fatal("Invalid pluralform %d", _lang.plural_form);
796  }
797  } else {
798  strgen_fatal("unknown pragma '%s'", str);
799  }
800 }
801 
802 static void rstrip(char *buf)
803 {
804  size_t i = strlen(buf);
805  while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
806  buf[i] = '\0';
807 }
808 
810 {
811  char buf[2048];
812  _warnings = _errors = 0;
813 
814  _translation = this->master || this->translation;
815  _file = this->file;
816 
817  /* For each new file we parse, reset the genders, and language codes. */
818  MemSetT(&_lang, 0);
822 
823  _cur_line = 1;
824  while (this->ReadLine(buf, lastof(buf)) != NULL) {
825  rstrip(buf);
826  this->HandleString(buf);
827  _cur_line++;
828  }
829 }
830 
836 {
837  int last = 0;
838  for (size_t i = 0; i < data.max_strings; i++) {
839  if (data.strings[i] != NULL) {
840  this->WriteStringID(data.strings[i]->name, (int)i);
841  last = (int)i;
842  }
843  }
844 
845  this->WriteStringID("STR_LAST_STRINGID", last);
846 }
847 
848 static int TranslateArgumentIdx(int argidx, int offset)
849 {
850  int sum;
851 
852  if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
853  strgen_fatal("invalid argidx %d", argidx);
854  }
855  const CmdStruct *cs = _cur_pcs.cmd[argidx];
856  if (cs != NULL && cs->consumes <= offset) {
857  strgen_fatal("invalid argidx offset %d:%d", argidx, offset);
858  }
859 
860  if (_cur_pcs.cmd[argidx] == NULL) {
861  strgen_fatal("no command for this argidx %d", argidx);
862  }
863 
864  for (int i = sum = 0; i < argidx; i++) {
865  const CmdStruct *cs = _cur_pcs.cmd[i];
866 
867  sum += (cs != NULL) ? cs->consumes : 1;
868  }
869 
870  return sum + offset;
871 }
872 
873 static void PutArgidxCommand(Buffer *buffer)
874 {
875  buffer->AppendUtf8(SCC_ARG_INDEX);
876  buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
877 }
878 
879 
880 static void PutCommandString(Buffer *buffer, const char *str)
881 {
882  _cur_argidx = 0;
883 
884  while (*str != '\0') {
885  /* Process characters as they are until we encounter a { */
886  if (*str != '{') {
887  buffer->AppendByte(*str++);
888  continue;
889  }
890 
891  char param[MAX_COMMAND_PARAM_SIZE];
892  int argno;
893  int casei;
894  const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
895  if (cs == NULL) break;
896 
897  if (casei != -1) {
898  buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
899  buffer->AppendByte(casei);
900  }
901 
902  /* For params that consume values, we need to handle the argindex properly */
903  if (cs->consumes > 0) {
904  /* Check if we need to output a move-param command */
905  if (argno != -1 && argno != _cur_argidx) {
906  _cur_argidx = argno;
907  PutArgidxCommand(buffer);
908  }
909 
910  /* Output the one from the master string... it's always accurate. */
911  cs = _cur_pcs.cmd[_cur_argidx++];
912  if (cs == NULL) {
913  strgen_fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
914  }
915  }
916 
917  cs->proc(buffer, param, cs->value);
918  }
919 }
920 
926 {
927  char buffer[2];
928  int offs = 0;
929  if (length >= 0x4000) {
930  strgen_fatal("string too long");
931  }
932 
933  if (length >= 0xC0) {
934  buffer[offs++] = (length >> 8) | 0xC0;
935  }
936  buffer[offs++] = length & 0xFF;
937  this->Write((byte*)buffer, offs);
938 }
939 
945 {
946  uint *in_use = AllocaM(uint, data.tabs);
947  for (size_t tab = 0; tab < data.tabs; tab++) {
948  uint n = data.CountInUse((uint)tab);
949 
950  in_use[tab] = n;
951  _lang.offsets[tab] = TO_LE16(n);
952 
953  for (uint j = 0; j != in_use[tab]; j++) {
954  const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
955  if (ls != NULL && ls->translated == NULL) _lang.missing++;
956  }
957  }
958 
959  _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
960  _lang.version = TO_LE32(data.Version());
961  _lang.missing = TO_LE16(_lang.missing);
962  _lang.winlangid = TO_LE16(_lang.winlangid);
963 
964  this->WriteHeader(&_lang);
965  Buffer buffer;
966 
967  for (size_t tab = 0; tab < data.tabs; tab++) {
968  for (uint j = 0; j != in_use[tab]; j++) {
969  const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
970  const Case *casep;
971  const char *cmdp;
972 
973  /* For undefined strings, just set that it's an empty string */
974  if (ls == NULL) {
975  this->WriteLength(0);
976  continue;
977  }
978 
979  _cur_ident = ls->name;
980  _cur_line = ls->line;
981 
982  /* Produce a message if a string doesn't have a translation. */
983  if (_show_todo > 0 && ls->translated == NULL) {
984  if ((_show_todo & 2) != 0) {
985  strgen_warning("'%s' is untranslated", ls->name);
986  }
987  if ((_show_todo & 1) != 0) {
988  const char *s = "<TODO> ";
989  while (*s != '\0') buffer.AppendByte(*s++);
990  }
991  }
992 
993  /* Extract the strings and stuff from the english command string */
994  ExtractCommandString(&_cur_pcs, ls->english, false);
995 
996  if (ls->translated_case != NULL || ls->translated != NULL) {
997  casep = ls->translated_case;
998  cmdp = ls->translated;
999  } else {
1000  casep = NULL;
1001  cmdp = ls->english;
1002  }
1003 
1004  _translated = cmdp != ls->english;
1005 
1006  if (casep != NULL) {
1007  const Case *c;
1008  uint num;
1009 
1010  /* Need to output a case-switch.
1011  * It has this format
1012  * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
1013  * Each LEN is printed using 2 bytes in big endian order. */
1014  buffer.AppendUtf8(SCC_SWITCH_CASE);
1015  /* Count the number of cases */
1016  for (num = 0, c = casep; c; c = c->next) num++;
1017  buffer.AppendByte(num);
1018 
1019  /* Write each case */
1020  for (c = casep; c != NULL; c = c->next) {
1021  buffer.AppendByte(c->caseidx);
1022  /* Make some space for the 16-bit length */
1023  uint pos = buffer.Length();
1024  buffer.AppendByte(0);
1025  buffer.AppendByte(0);
1026  /* Write string */
1027  PutCommandString(&buffer, c->string);
1028  buffer.AppendByte(0); // terminate with a zero
1029  /* Fill in the length */
1030  uint size = buffer.Length() - (pos + 2);
1031  buffer[pos + 0] = GB(size, 8, 8);
1032  buffer[pos + 1] = GB(size, 0, 8);
1033  }
1034  }
1035 
1036  if (cmdp != NULL) PutCommandString(&buffer, cmdp);
1037 
1038  this->WriteLength(buffer.Length());
1039  this->Write(buffer.Begin(), buffer.Length());
1040  buffer.Clear();
1041  }
1042  }
1043 }