[103/125] gccrs: Improve parsing of raw byte string literals

Message ID 20240801145809.366388-105-arthur.cohen@embecosm.com
State Committed
Commit 3435f4e92b06747eb7d3777e8f60aa80bc495263
Headers
Series [001/125] Rust: Make 'tree'-level 'MAIN_NAME_P' work |

Commit Message

Arthur Cohen Aug. 1, 2024, 2:57 p.m. UTC
  From: Owen Avery <powerboat9.gamer@gmail.com>

gcc/rust/ChangeLog:

	* lex/rust-lex.cc
	(Lexer::parse_raw_byte_string):
	Bring handling of edge cases to par with parse_byte_string.

gcc/testsuite/ChangeLog:

	* rust/compile/raw-byte-string-loc.rs: New test.

Signed-off-by: Owen Avery <powerboat9.gamer@gmail.com>
---
 gcc/rust/lex/rust-lex.cc                      | 23 +++++++++++++++----
 .../rust/compile/raw-byte-string-loc.rs       |  6 +++++
 2 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/rust/compile/raw-byte-string-loc.rs
  

Patch

diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 9c2203160cd..7c37e83d6cb 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1840,14 +1840,18 @@  Lexer::parse_raw_byte_string (location_t loc)
   int length = 1;
   int hash_count = 0;
 
+  const location_t string_begin_locus = get_current_location ();
+
   // get hash count at beginnning
   skip_input ();
   current_char = peek_input ();
   length++;
+  current_column++;
   while (current_char == '#')
     {
       hash_count++;
       length++;
+      current_column++;
 
       skip_input ();
       current_char = peek_input ();
@@ -1862,6 +1866,7 @@  Lexer::parse_raw_byte_string (location_t loc)
   skip_input ();
   current_char = peek_input ();
   length++;
+  current_column++;
 
   while (true)
     {
@@ -1884,27 +1889,37 @@  Lexer::parse_raw_byte_string (location_t loc)
 	      skip_input (hash_count);
 	      current_char = peek_input ();
 	      length += hash_count + 1;
+	      current_column += hash_count + 1;
 	      break;
 	    }
 	}
-
-      if (current_char.value > 127)
+      else if (current_char.value > 127)
 	{
 	  rust_error_at (get_current_location (),
 			 "character %<%s%> in raw byte string out of range",
 			 current_char.as_string ().c_str ());
 	  current_char = 0;
 	}
+      else if (current_char.is_eof ())
+	{
+	  rust_error_at (string_begin_locus, "unended raw byte string literal");
+	  return Token::make (END_OF_FILE, get_current_location ());
+	}
 
       length++;
+      current_column++;
+      if (current_char == '\n')
+	{
+	  current_line++;
+	  current_column = 1;
+	  start_line (current_line, max_column_hint);
+	}
 
       str += current_char;
       skip_input ();
       current_char = peek_input ();
     }
 
-  current_column += length;
-
   loc += length - 1;
 
   str.shrink_to_fit ();
diff --git a/gcc/testsuite/rust/compile/raw-byte-string-loc.rs b/gcc/testsuite/rust/compile/raw-byte-string-loc.rs
new file mode 100644
index 00000000000..f37d3f9694d
--- /dev/null
+++ b/gcc/testsuite/rust/compile/raw-byte-string-loc.rs
@@ -0,0 +1,6 @@ 
+const X: &'static u8 = br#"12
+12"#;
+
+BREAK
+// { dg-error "unrecognised token" "" { target *-*-* } .-1 }
+// { dg-excess-errors "error 'failed to parse item' does not have location" }