Henri Sivonen
Mozilla
Encoding
Decoder & Encoder
Encoding is the factorylet encoding: &'static Encoding =
Encoding::for_label( // by label
byte_slice_from_protocol
).unwrap_or(
WINDOWS_1252 // by named static
);
let decoder: Decoder =
encoding.new_decoder();
enum-Based Polymorphismpub struct Decoder { // no vtable
variant: VariantDecoder,
// ...
}
pub enum VariantDecoder { // no extensibility
SingleByte(SingleByteDecoder),
Utf8(Utf8Decoder),
Gb18030(Gb18030Decoder),
// ...
}
pub enum DecoderResult {
InputEmpty,
OutputFull,
Malformed(u8, u8),
}
impl Decoder {
pub fn decode_to_utf16_without_replacement(
&mut self,
src: &[u8],
dst: &mut [u16],
last: bool
) -> (DecoderResult, usize, usize)
}
impl Encoding {
pub fn decode_without_bom_handling_and_without_replacement
<'a>(
&'static self,
bytes: &'a [u8],
) -> Option<Cow<'a, str>>
}
gsl::not_null<T>static Singletons in Rustpub static UTF_8_INIT: Encoding = Encoding {
name: "UTF-8",
variant: VariantEncoding::Utf8,
};
pub static UTF_8: &'static Encoding = &UTF_8_INIT;
static Singletons in FFIpub struct ConstEncoding(*const Encoding);
unsafe impl Sync for ConstEncoding {}
#[no_mangle]
pub static UTF_8_ENCODING: ConstEncoding =
ConstEncoding(&UTF_8_INIT);
static Singletons in C++class Encoding;
extern gsl::not_null<const encoding_rs::Encoding*>
const UTF_8_ENCODING;
Encoding as a C++ classclass Encoding final {
// ...
private:
Encoding() = delete;
Encoding(const Encoding&) = delete;
Encoding& operator=(const Encoding&) = delete;
~Encoding() = delete;
};
std::unique_ptr<T>let ptr: Box<Foo>std::unique_ptr<Foo> ptrBox::new(Foo::new(a, b, c))make_unique<Foo>(a, b, c)Box::into_raw(ptr)ptr.release()let ptr = Box::from_raw(raw_ptr);std::unique_ptr<Foo> ptr(raw_ptr);impl Encoding {
pub fn new_decoder(&'static self) -> Decoder {
// ...
}
}
#[no_mangle]
pub unsafe extern "C" fn encoding_new_decoder(
encoding: *const Encoding) -> *mut Decoder
{
Box::into_raw(Box::new((*encoding).new_decoder()))
}
#[no_mangle]
pub unsafe extern "C" fn decoder_free(decoder: *mut Decoder) {
let _ = Box::from_raw(decoder);
}
class Encoding final {
public:
inline std::unique_ptr<Decoder> new_decoder() const
{
return std::unique_ptr<Decoder>(
encoding_new_decoder(this));
}
};
class Decoder final {
public:
~Decoder() {}
static inline void operator delete(void* decoder)
{
decoder_free(reinterpret_cast<Decoder*>(decoder));
}
private:
Decoder() = delete;
Decoder(const Decoder&) = delete;
Decoder& operator=(const Decoder&) = delete;
};
&self is Sugarimpl Foo {
pub fn get_val(&self) -> usize {
self.val
}
}
fn test(bar: Foo) {
assert_eq!(bar.get_val(), Foo::get_val(&bar));
}
&self and thisfn foo(&self, bar: usize) -> usizesize_t foo(size_t bar) const
fn foo(&mut self, bar: usize) -> usizesize_t foo(size_t bar)
nsISupportsQObjectstd::optional<T>return None;return std::nullopt;return Some(foo);return foo;is_some()operator bool()has_value()unwrap()value()unwrap_or(bar)value_or(bar)operator*() is unchecked!std::tuple
<Types...>fn foo() -> (T, U, V)std::tuple<T, U, V> foo()return (a, b, c);return {a, b, c};let (a, b, c) = foo();const auto [a, b, c] = foo();let mut (a, b, c) = foo();auto [a, b, c] = foo();gsl::span<T>src: &[u8]const uint8_t* src, size_t src_lendst: &mut [u8]uint8_t* dst, size_t dst_lensrc: &[u8]gsl::span<const uint8_t> srcdst: &mut [u8]gsl::span<uint8_t> dst&mut vec[..]gsl::make_span(vec)std::slice::from_raw_parts(ptr, len)gsl::make_span(ptr, len)for item in slice {}for (auto item : span) {}slice[i]span[i]slice.len()span.size()slice.as_ptr()span.data()&slice[i..]span.subspan(i)&slice[..i]span.subspan(0, i)&slice[i..j]span.subspan(i, j - i) ðŸ˜mozilla::Span&slice[i..]span.From(i)&slice[..i]span.To(i)&slice[i..j]span.FromTo(i, j)std::string_viewstd::u16string_viewgsl::span)
pmust be non-null and aligned, even for zero-length slices, as is required for all references. However, for zero-length slices,pcan be a bogus non-dereferencable pointer such asNonNull::dangling().
This slide has been edited from the version shown at RustFest in order to avoid spreading out-of-date information.
template <class T>
static inline T* null_to_bogus(T* ptr)
{
return ptr ? ptr : reinterpret_cast<T*>(alignof(T));
}
This slide has been edited from the version shown at RustFest in order to avoid spreading out-of-date information.
impl Encoding {
pub fn for_bom(buffer: &[u8]) ->
Option<(&'static Encoding, usize)>
{
if buffer.starts_with(b"\xEF\xBB\xBF") {
Some((UTF_8, 3))
} else if buffer.starts_with(b"\xFF\xFE") {
Some((UTF_16LE, 2))
} else if buffer.starts_with(b"\xFE\xFF") {
Some((UTF_16BE, 2))
} else {
None
}
}
}
#[no_mangle]
pub unsafe extern "C" fn encoding_for_bom(buffer: *const u8,
buffer_len: *mut usize)
-> *const Encoding
{
let buffer_slice =
::std::slice::from_raw_parts(buffer, *buffer_len);
let (encoding, bom_length) =
match Encoding::for_bom(buffer_slice) {
Some((encoding, bom_length)) =>
(encoding as *const Encoding, bom_length),
None => (::std::ptr::null(), 0),
};
*buffer_len = bom_length;
encoding
}
class Encoding final {
public:
static inline std::optional<
std::tuple<gsl::not_null<const Encoding*>, size_t>>
for_bom(gsl::span<const uint8_t> buffer)
{
size_t len = buffer.size();
const Encoding* encoding =
encoding_for_bom(null_to_bogus(buffer.data()), &len);
if (encoding) {
return std::make_tuple(
gsl::not_null<const Encoding*>(encoding), len);
}
return std::nullopt;
}
};
std::variant
<Types...>enumu32pub const INPUT_EMPTY: u32 = 0;
pub const OUTPUT_FULL: u32 = 0xFFFFFFFF;
fn decoder_result_to_u32(result: DecoderResult) -> u32 {
match result {
DecoderResult::InputEmpty => INPUT_EMPTY,
DecoderResult::OutputFull => OUTPUT_FULL,
DecoderResult::Malformed(bad, good) =>
(good as u32) << 8) | (bad as u32),
}
}
Cows?std::variant<std::string, std::string_view>
this is like &selfstaticsgsl::not_null<T>std::unique_ptr<T>= delete;static void
operator delete
(void*)std::optional<T>std::tuple
<Types...>gsl::span<T>std::string_viewstd::variant
<Types...>CowsCode extracts from encoding_rs and encoding_c are Copyright Mozilla Foundation and are licensed under the Apache License, Version 2.0 <LICENSE-APACHE or https://www.apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your option.
reveal.js used under its MIT license. highlight.js used under its BSD license. Fira Sans and Fira Mono used under the Open Font License 1.1.
Other slide content Copyright Mozilla Foundation, licensed under the Creative Commons Attribution 4.0 International license.