Skip to content

Commit 1040e1c

Browse files
committed
Fix non-ascii chars in rust byte array
1 parent 54b6f76 commit 1040e1c

40 files changed

Lines changed: 225 additions & 141 deletions

cpp2rust/converter/converter.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,8 +1700,10 @@ bool Converter::VisitFloatingLiteral(clang::FloatingLiteral *expr) {
17001700
}
17011701

17021702
bool Converter::VisitCharacterLiteral(clang::CharacterLiteral *expr) {
1703-
std::string ch = GetEscapedCharLiteral(expr->getValue());
1704-
ch = "'" + std::move(ch) + "'";
1703+
auto uc = static_cast<unsigned char>(expr->getValue());
1704+
std::string ch = uc > 0x7F
1705+
? std::format("'\\u{{{:x}}}'", uc)
1706+
: "'" + GetEscapedCharLiteral(expr->getValue()) + "'";
17051707
{
17061708
PushParen paren(*this);
17071709
StrCat(ch, keyword::kAs, ToStringBase(expr->getType()));
@@ -1710,7 +1712,8 @@ bool Converter::VisitCharacterLiteral(clang::CharacterLiteral *expr) {
17101712
return false;
17111713
}
17121714

1713-
std::string Converter::GetEscapedCharLiteral(char character) const {
1715+
std::string Converter::GetEscapedCharLiteral(char character,
1716+
bool byte_string) const {
17141717
switch (character) {
17151718
case '"':
17161719
return "\\\"";
@@ -1728,7 +1731,7 @@ std::string Converter::GetEscapedCharLiteral(char character) const {
17281731
return "\\0";
17291732
}
17301733
auto uc = static_cast<unsigned char>(character);
1731-
if (uc < 0x20 || uc >= 0x7F) {
1734+
if (uc < 0x20 || uc == 0x7F || (byte_string && uc > 0x7F)) {
17321735
return std::format("\\x{:02x}", uc);
17331736
}
17341737
return std::string(1, character);
@@ -1747,14 +1750,15 @@ std::string Converter::GetEscapedUTF8CharLiteral(clang::Expr *expr) const {
17471750
}
17481751

17491752
std::string Converter::GetEscapedStringLiteral(clang::Expr *expr,
1750-
uint64_t pad_nulls) const {
1753+
uint64_t pad_nulls,
1754+
bool byte_string) const {
17511755
auto str_expr = clang::dyn_cast<clang::StringLiteral>(expr->IgnoreCasts());
17521756
assert(str_expr);
17531757
auto raw = str_expr->getString();
17541758
std::string out;
17551759
out.push_back('"');
17561760
for (unsigned char c : raw) {
1757-
out += GetEscapedCharLiteral(static_cast<char>(c));
1761+
out += GetEscapedCharLiteral(static_cast<char>(c), byte_string);
17581762
}
17591763
for (uint64_t i = 0; i < pad_nulls; ++i) {
17601764
out += "\\0";
@@ -1775,12 +1779,12 @@ bool Converter::VisitStringLiteral(clang::StringLiteral *expr) {
17751779
? arr_size - expr->getString().size()
17761780
: 0;
17771781
StrCat(token::kStar,
1778-
std::format("b{}", GetEscapedStringLiteral(expr, pad)));
1782+
std::format("b{}", GetEscapedStringLiteral(expr, pad, true)));
17791783
return false;
17801784
}
17811785
StrCat(token::kStar);
17821786
}
1783-
StrCat(std::format("b{}", GetEscapedStringLiteral(expr, 1)));
1787+
StrCat(std::format("b{}", GetEscapedStringLiteral(expr, 1, true)));
17841788
return false;
17851789
}
17861790

cpp2rust/converter/converter.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,13 @@ class Converter : public clang::RecursiveASTVisitor<Converter> {
240240

241241
virtual bool VisitCharacterLiteral(clang::CharacterLiteral *expr);
242242

243-
std::string GetEscapedCharLiteral(char character) const;
243+
std::string GetEscapedCharLiteral(char character,
244+
bool byte_string = false) const;
244245

245246
std::string GetEscapedUTF8CharLiteral(clang::Expr *expr) const;
246247

247-
std::string GetEscapedStringLiteral(clang::Expr *expr,
248-
uint64_t pad_nulls = 0) const;
248+
std::string GetEscapedStringLiteral(clang::Expr *expr, uint64_t pad_nulls = 0,
249+
bool byte_string = false) const;
249250
virtual bool VisitStringLiteral(clang::StringLiteral *expr);
250251

251252
virtual bool VisitCXXBoolLiteralExpr(clang::CXXBoolLiteralExpr *expr);

cpp2rust/converter/models/converter_refcount.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,10 +1023,10 @@ bool ConverterRefCount::VisitStringLiteral(clang::StringLiteral *expr) {
10231023
: 0;
10241024
}
10251025
StrCat(std::format("Box::<[u8]>::from(b{}.as_slice())",
1026-
GetEscapedStringLiteral(expr, pad)));
1026+
GetEscapedStringLiteral(expr, pad, true)));
10271027
return false;
10281028
}
1029-
StrCat(GetEscapedStringLiteral(expr));
1029+
StrCat(std::format("b{}", GetEscapedStringLiteral(expr, 0, true)));
10301030
return false;
10311031
}
10321032

libcc2rs/src/rc.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -940,9 +940,10 @@ impl fmt::Display for Ptr<u8> {
940940
}
941941
}
942942

943+
type StringLiteralMap = HashMap<&'static [u8], Rc<RefCell<Vec<u8>>>>;
944+
943945
thread_local! {
944-
static STRING_LITERALS: RefCell<HashMap<&'static str, Rc<RefCell<Vec<u8>>>>> =
945-
RefCell::new(HashMap::new());
946+
static STRING_LITERALS: RefCell<StringLiteralMap> = RefCell::new(HashMap::new());
946947
}
947948

948949
impl Ptr<u8> {
@@ -1020,12 +1021,12 @@ impl Ptr<u8> {
10201021
}
10211022

10221023
#[inline]
1023-
pub fn from_string_literal(s: &'static str) -> Self {
1024+
pub fn from_string_literal(s: &'static [u8]) -> Self {
10241025
STRING_LITERALS.with(|literals| {
10251026
let mut literals = literals.borrow_mut();
10261027
let weak = Rc::downgrade(literals.entry(s).or_insert_with(|| {
10271028
Rc::new(RefCell::new({
1028-
let mut v = s.as_bytes().to_vec();
1029+
let mut v = s.to_vec();
10291030
v.push(0);
10301031
v
10311032
}))

tests/unit/out/refcount/bool_condition_logical.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ fn main_0() -> i32 {
127127
if ((*n.borrow()) != 0) || (((*bits.borrow()) & 256_i64) != 0) {
128128
assert!(true);
129129
}
130-
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal("hi")));
130+
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal(b"hi")));
131131
let cnp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::<u8>::null()));
132132
if ((*x.borrow()) > (*y.borrow())) && (!(*cp.borrow()).is_null()) {
133133
assert!(true);

tests/unit/out/refcount/bool_condition_logical_c.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ fn main_0() -> i32 {
152152
{
153153
assert!((1 != 0));
154154
}
155-
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal("hi")));
155+
let cp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::from_string_literal(b"hi")));
156156
let cnp: Value<Ptr<u8>> = Rc::new(RefCell::new(Ptr::<u8>::null()));
157157
if (((((((*x.borrow()) > (*y.borrow())) as i32) != 0) && (!(*cp.borrow()).is_null())) as i32)
158158
!= 0)

tests/unit/out/refcount/char_printing.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ fn main_0() -> i32 {
1313
let vec_: Value<Vec<u8>> = Rc::new(RefCell::new(vec![195_u8, 167_u8]));
1414
let i: Value<i32> = Rc::new(RefCell::new(27));
1515
let str: Value<Vec<u8>> = Rc::new(RefCell::new(
16-
Ptr::from_string_literal("rdas.")
16+
Ptr::from_string_literal(b"rdas.")
1717
.to_c_string_iterator()
1818
.chain(std::iter::once(0))
1919
.collect::<Vec<u8>>(),

tests/unit/out/refcount/char_printing_cerr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ fn main_0() -> i32 {
1313
let vec_: Value<Vec<u8>> = Rc::new(RefCell::new(vec![195_u8, 167_u8]));
1414
let i: Value<i32> = Rc::new(RefCell::new(27));
1515
let str: Value<Vec<u8>> = Rc::new(RefCell::new(
16-
Ptr::from_string_literal("rdas.")
16+
Ptr::from_string_literal(b"rdas.")
1717
.to_c_string_iterator()
1818
.chain(std::iter::once(0))
1919
.collect::<Vec<u8>>(),

tests/unit/out/refcount/default_in_statics.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ thread_local!(
111111
);
112112
thread_local!(
113113
pub static static_foo_3: Value<Foo> = Rc::new(RefCell::new(Foo {
114-
s1: Rc::new(RefCell::new(Ptr::from_string_literal("hello"))),
114+
s1: Rc::new(RefCell::new(Ptr::from_string_literal(b"hello"))),
115115
s2: Rc::new(RefCell::new(Ptr::<u8>::null())),
116116
fn1: Rc::new(RefCell::new(FnPtr::null())),
117117
fn2: Rc::new(RefCell::new(FnPtr::null())),
@@ -121,14 +121,14 @@ thread_local!(
121121
thread_local!(
122122
pub static static_foo_array_4: Value<Box<[Foo]>> = Rc::new(RefCell::new(Box::new([
123123
Foo {
124-
s1: Rc::new(RefCell::new(Ptr::from_string_literal("first"))),
124+
s1: Rc::new(RefCell::new(Ptr::from_string_literal(b"first"))),
125125
s2: Rc::new(RefCell::new(Ptr::<u8>::null())),
126126
fn1: Rc::new(RefCell::new(FnPtr::null())),
127127
fn2: Rc::new(RefCell::new(FnPtr::null())),
128128
n: Rc::new(RefCell::new(1)),
129129
},
130130
Foo {
131-
s1: Rc::new(RefCell::new(Ptr::from_string_literal("second"))),
131+
s1: Rc::new(RefCell::new(Ptr::from_string_literal(b"second"))),
132132
s2: Rc::new(RefCell::new(Ptr::<u8>::null())),
133133
fn1: Rc::new(RefCell::new(FnPtr::null())),
134134
fn2: Rc::new(RefCell::new(FnPtr::null())),

tests/unit/out/refcount/enum_int_interop.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,17 +91,17 @@ thread_local!(
9191
thread_local!(
9292
pub static entries_3: Value<Box<[Entry]>> = Rc::new(RefCell::new(Box::new([
9393
Entry {
94-
name: Rc::new(RefCell::new(Ptr::from_string_literal("first"))),
94+
name: Rc::new(RefCell::new(Ptr::from_string_literal(b"first"))),
9595
color: Rc::new(RefCell::new(Color::RED)),
9696
opt: Rc::new(RefCell::new(Option::OPT_NONE)),
9797
},
9898
Entry {
99-
name: Rc::new(RefCell::new(Ptr::from_string_literal("second"))),
99+
name: Rc::new(RefCell::new(Ptr::from_string_literal(b"second"))),
100100
color: Rc::new(RefCell::new(Color::GREEN)),
101101
opt: Rc::new(RefCell::new(Option::OPT_A)),
102102
},
103103
Entry {
104-
name: Rc::new(RefCell::new(Ptr::from_string_literal("third"))),
104+
name: Rc::new(RefCell::new(Ptr::from_string_literal(b"third"))),
105105
color: Rc::new(RefCell::new(Color::BLUE)),
106106
opt: Rc::new(RefCell::new(Option::OPT_C)),
107107
},

0 commit comments

Comments
 (0)