4
Copyright (c) 2021 МГТУ им. Н.Э. Баумана, кафедра ИУ-6, Михаил Фетисов,
6
https://bmstu.codes/lsx/simodo
9
#include "simodo/inout/token/InputStream.h"
19
const unsigned char bytes[size];
22
static constexpr UTF_BOM<3> UTF8_BOM = {
25
static constexpr UTF_BOM<2> UTF16BE_BOM = {
28
static constexpr UTF_BOM<2> UTF16LE_BOM = {
31
static constexpr UTF_BOM<4> UTF32BE_BOM = {
32
{0x00, 0x00, 0xFE, 0xFF}
34
static constexpr UTF_BOM<4> UTF32LE_BOM = {
35
{0xFF, 0xFE, 0x00, 0x00}
38
template <int bom_size>
39
static bool skipUtfBom(
41
, const UTF_BOM<bom_size> bom)
45
; is.peek() == bom.bytes[i] && i < bom_size
49
if (i == bom_size) return true;
60
namespace simodo::inout
63
char16_t InputStream::get()
65
if (_surrogate_pair != 0)
67
char16_t ch = _surrogate_pair;
73
return std::char_traits<char16_t>::eof();
77
skipUtfBom(_in, UTF8_BOM)
78
|| skipUtfBom(_in, UTF16BE_BOM)
79
|| skipUtfBom(_in, UTF16LE_BOM)
80
|| skipUtfBom(_in, UTF32BE_BOM)
81
|| skipUtfBom(_in, UTF32LE_BOM);
89
return static_cast<char16_t>(ch1);
94
if ((ch1 & 0b11100000) == 0b11000000)
97
code = (ch1 & 0b00011111);
99
else if ((ch1 & 0b11110000) == 0b11100000)
102
code = (ch1 & 0b00001111);
107
code = (ch1 & 0b00000111);
110
for(int i=1; i < count; ++i)
114
if (ch == std::char_traits<char16_t>::eof())
117
code = (code << 6) + (static_cast<uint32_t>(ch) & 0b00111111);
120
if (code <= 0xD7FF ||
121
((code >= 0xE000) && (code <= 0xFFFF)) )
123
return static_cast<char16_t>(code);
125
else if ((code >= 0xD800) && (code <= 0xDFFF))
127
// unicode replacement character
134
_surrogate_pair = 0xD800 + ((code >> 10) & 0x3FF);
135
return 0xDC00 + (code & 0x3FF);