@@ -153,12 +153,10 @@ def processPointers4Bytes(data, header):
153153 # Read four bytes as a quartet
154154 quartet = data [i :i + 4 ]
155155
156- # Get last two byte of the quartet
157- get2Bytes = quartet [2 :]
158-
159156 # Convert the final triplet to an integer
160- value = int .from_bytes (get2Bytes , byteorder = 'big' ) + header
157+ value = int .from_bytes (quartet , byteorder = 'big' ) + header
161158 result .append (value )
159+
162160 return result
163161
164162def extractTexts (romData , addressesList , lineBreakers , charTable ):
@@ -187,13 +185,14 @@ def extractTexts(romData, addressesList, lineBreakers, charTable):
187185 # Loop over each starting address in the list
188186 for addr in addressesList :
189187 text = bytearray ()
188+ decodedValidCharacter = False
190189
191190 while True :
192191 byte = romData [addr ]
193192 bytesLineCounter += 1
194193
195194 # If the byte is a line-breaker, stop extracting
196- if byte in lineBreakers :
195+ if byte in lineBreakers and decodedValidCharacter :
197196 breakerByte = byte
198197 break
199198
@@ -203,10 +202,12 @@ def extractTexts(romData, addressesList, lineBreakers, charTable):
203202 # If single character
204203 if len (char ) == 1 :
205204 text .append (ord (char ))
205+ decodedValidCharacter = True
206206 # If multiple characters (DTE/MTE)
207207 else :
208208 for c in char :
209209 text .append (ord (c ))
210+ decodedValidCharacter = True
210211 # If byte is not in charTable, print in format ~hex~
211212 else :
212213 hexValue = format (byte , '02X' )
@@ -243,6 +244,76 @@ def extractTexts(romData, addressesList, lineBreakers, charTable):
243244
244245 return texts , totalBytesRead , linesLength
245246
247+ def extractTextsNoLineBreakers (romData , addressesList , endOffset , charTable ):
248+ """
249+ Extracts texts from the ROM data at specified addresses based on the lengths in linesLength.
250+
251+ Parameters:
252+ romData (bytes): The complete ROM data.
253+ addressesList (list): A list of addresses to read the texts from.
254+ endOffset (set): The final offset after the last address.
255+ charTable (dict): A dictionary mapping byte values to characters or sequences.
256+
257+ Returns:
258+ tuple: Containing:
259+ - texts (list): Extracted script text.
260+ - totalBytesRead (int): Total text block size.
261+ - linesLength (list): Length of each line in bytes.
262+ """
263+ texts = []
264+ linesLength = []
265+ total = 0
266+
267+ # Add final offset to the addressesList
268+ addressesList .append (int (endOffset .pop ()))
269+
270+ # Calculate lines lenght of each segment is the difference between consecutive addresses
271+ for i in range (len (addressesList ) - 1 ):
272+ length = int (addressesList [i + 1 ]) - int (addressesList [i ])
273+ linesLength .append (length )
274+
275+ # Loop over each starting address in the list and use linesLength for determining byte ranges
276+ for i in range (len (addressesList ) - 1 ):
277+ startAddr = addressesList [i ]
278+ length = linesLength [i ] # Get the length for this segment
279+ endAddr = startAddr + length
280+
281+ text = bytearray ()
282+ decodedValidCharacter = False
283+
284+ # Read bytes from the starting address to the end address (using the specified length)
285+ for addr in range (startAddr , endAddr ):
286+ byte = romData [addr ]
287+
288+ # Map the byte using charTable to get the character
289+ char = charTable .get (byte , None )
290+ if char :
291+ # If single character
292+ if len (char ) == 1 :
293+ text .append (ord (char ))
294+ decodedValidCharacter = True
295+ # If multiple characters (DTE/MTE)
296+ else :
297+ for c in char :
298+ text .append (ord (c ))
299+ decodedValidCharacter = True
300+ # If byte is not in charTable, print in format ~hex~
301+ else :
302+ hexValue = format (byte , '02X' )
303+ text .extend (f"~{ hexValue } ~" .encode ('UTF-8' ))
304+
305+ # Convert byte array to string
306+ decodedText = text .decode ('iso-8859-1' , errors = 'replace' )
307+
308+ # Append the decoded text to the list
309+ texts .append (decodedText )
310+ total += length
311+
312+ # Calculate total bytes read (this will be the sum of all lengths in linesLength)
313+ totalBytesRead = total
314+
315+ return texts , totalBytesRead , linesLength
316+
246317def parseLineBreakers (string ):
247318 """
248319 Parse a string of comma-separated hexadecimal values into a set of integers.
@@ -257,8 +328,11 @@ def parseLineBreakers(string):
257328 for byte in string .split (',' ):
258329 byte = byte .strip ()
259330 lineBreakers .add (int (byte , 16 ))
331+
332+ # If the lineBreaker are the final text offset
333+ is_offset = any (value > 255 for value in lineBreakers )
260334
261- return lineBreakers
335+ return lineBreakers , is_offset
262336
263337def formatHexString (hexString ):
264338 """
0 commit comments