-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathentities_print_table.c
More file actions
59 lines (57 loc) · 1.99 KB
/
entities_print_table.c
File metadata and controls
59 lines (57 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
// How to look up an entity quickly?
//
// Looping through the whole entities table is the slowest option. We would need to do binary search or
// build a hash table. A hash table without collisions realistically has filling factor of only 10%. This
// is too memory-inefficient. Even a hash table with at most 3 collisions would have a filling
// factor of only 50%.
//
// Encoding all entities in a single string is most memory-efficient and the lookup is faster than
// in the entities table.
//
// The speed can be improved a lot by indexing by the first character of the entity.
//
// Size of entities struct: 157 kB
// Size of single entities string: 25 kB
// Size of entities array indexed by first character: 27 kB
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "entities.h"
int main()
{
fputs("static const char *ENTITIES[1 + (unsigned char) -1] = {\n", stdout);
for (uint_fast16_t c = 0; c <= 255; ++c) {
bool print_entries = false;
for (uint_fast16_t i = 0; i < sizeof entities / sizeof *entities; ++i) {
if (entities[i].entity[0] == c) {
print_entries = true;
break;
}
}
if (!print_entries) {
continue;
}
printf(" ['%c'] =\n \"", c);
uint_fast8_t line_length = 0;
for (size_t i = 0; i < sizeof entities / sizeof *entities; ++i) {
if (entities[i].entity[0] != c) {
continue;
}
if (line_length > 90) {
printf("\"\n \"");
line_length = 0;
}
printf("&%s", entities[i].entity);
if (entities[i].decoding == "\"") {
fputs("\\\"", stdout);
}
else {
fputs(entities[i].decoding, stdout);
}
line_length += strlen(entities[i].entity) + strlen(entities[i].decoding);
}
fputs("\",\n", stdout);
}
fputs("};\n", stdout);
}