Prusa Slicer 2.6.0
Loading...
Searching...
No Matches
encoding-check.cpp File Reference
#include <vector>
#include <iostream>
#include <fstream>
#include <cstdlib>
+ Include dependency graph for encoding-check.cpp:

Go to the source code of this file.

Functions

unsigned char * utf8_check (unsigned char *s)
 
int main (int argc, char const *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char const argv[] 
)
73{
74 if (argc != 3) {
75 std::cerr << "Usage: " << argv[0] << " <program/library> <file>" << std::endl;
76 return -1;
77 }
78
79 const char* target = argv[1];
80 const char* filename = argv[2];
81
82 const auto error_exit = [=](const char* error) {
83 std::cerr << "\n\tError: " << error << ": " << filename << "\n"
84 << "\tTarget: " << target << "\n"
85 << std::endl;
86 std::exit(-2);
87 };
88
89 std::ifstream file(filename, std::ios::binary | std::ios::ate);
90 const auto size = file.tellg();
91
92 if (size == 0) {
93 return 0;
94 }
95
96 file.seekg(0, std::ios::beg);
97 std::vector<char> buffer(size);
98
99 if (file.read(buffer.data(), size)) {
100 buffer.push_back('\0');
101
102 // Check UTF-8 validity
103 if (utf8_check(reinterpret_cast<unsigned char*>(buffer.data())) != nullptr) {
104 error_exit("Source file does not contain (valid) UTF-8");
105 }
106
107 // Check against a BOM mark
108 if (buffer.size() >= 3
109 && buffer[0] == '\xef'
110 && buffer[1] == '\xbb'
111 && buffer[2] == '\xbf') {
112 error_exit("Source file is valid UTF-8 but contains a BOM mark");
113 }
114 } else {
115 error_exit("Could not read source file");
116 }
117
118 return 0;
119}
unsigned char * utf8_check(unsigned char *s)
Definition encoding-check.cpp:26
constexpr auto size(const C &c) -> decltype(c.size())
Definition span.hpp:183
static char error[256]
Definition tga.cpp:50

References error, and utf8_check().

+ Here is the call graph for this function:

◆ utf8_check()

unsigned char * utf8_check ( unsigned char *  s)
27{
28 while (*s) {
29 if (*s < 0x80) {
30 // 0xxxxxxx
31 s++;
32 } else if ((s[0] & 0xe0) == 0xc0) {
33 // 110xxxxx 10xxxxxx
34 if ((s[1] & 0xc0) != 0x80 ||
35 (s[0] & 0xfe) == 0xc0) { // overlong?
36 return s;
37 } else {
38 s += 2;
39 }
40 } else if ((s[0] & 0xf0) == 0xe0) {
41 // 1110xxxx 10xxxxxx 10xxxxxx
42 if ((s[1] & 0xc0) != 0x80 ||
43 (s[2] & 0xc0) != 0x80 ||
44 (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || // overlong?
45 (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || // surrogate?
46 (s[0] == 0xef && s[1] == 0xbf &&
47 (s[2] & 0xfe) == 0xbe)) { // U+FFFE or U+FFFF?
48 return s;
49 } else {
50 s += 3;
51 }
52 } else if ((s[0] & 0xf8) == 0xf0) {
53 // 11110xxX 10xxxxxx 10xxxxxx 10xxxxxx
54 if ((s[1] & 0xc0) != 0x80 ||
55 (s[2] & 0xc0) != 0x80 ||
56 (s[3] & 0xc0) != 0x80 ||
57 (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || // overlong?
58 (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) { // > U+10FFFF?
59 return s;
60 } else {
61 s += 4;
62 }
63 } else {
64 return s;
65 }
66 }
67
68 return NULL;
69}

Referenced by main().

+ Here is the caller graph for this function: