[Lazarus] UTF8LengthFast returning incorrect results on AARCH64 (MacOS)
Bart
bartjunk64 at gmail.com
Tue Dec 28 13:26:56 CET 2021
On Tue, Dec 28, 2021 at 12:08 PM Martin Frb via lazarus
<lazarus at lists.lazarus-ide.org> wrote:
> I would like to see the generates assembler on M1, if that is possible? (for code with optimization off, as well as code with whatever optimization was used so far)
@Noel:
Here's example code (standalone) you can use to test both signed and
unsigned versions.
Save this code as ulen.pas
In order to get assembler output compile with:
fpc -al ulen.pas
This will produce the file ulen.s
You can attach or copy that here.
=============
program ulen;
{$mode objfpc}{$H+}
{$optimization off}
{$codepage utf8}
uses
SysUtils;
function UTF8LengthFast_Signed(p: PChar; ByteCount: PtrInt): PtrInt;
const
{$ifdef CPU32}
ONEMASK =$01010101;
EIGHTYMASK=$80808080;
{$endif}
{$ifdef CPU64}
ONEMASK =$0101010101010101;
EIGHTYMASK=$8080808080808080;
{$endif}
var
pnx: PPtrInt absolute p; // To get contents of text in PtrInt
blocks. x refers to 32 or 64 bits
pn8: pint8 absolute pnx; // To read text as Int8 in the initial and
final loops
ix: PtrInt absolute pnx; // To read text as PtrInt in the block loop
nx: PtrInt; // values processed in block loop
i,cnt,e: PtrInt;
begin
Result := 0;
e := ix+ByteCount; // End marker
// Handle any initial misaligned bytes.
cnt := (not (ix-1)) and (sizeof(PtrInt)-1);
if cnt>ByteCount then
cnt := ByteCount;
for i := 1 to cnt do
begin
// Is this byte NOT the first byte of a character?
//writeln('pn8^ = ',byte(pn8^).ToBinString);
//writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString);
//writeln('not pn8^ = ',Byte(not pn8^).ToBinString);
//writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString);
//writeln;
Result += (pn8^ shr 7) and ((not pn8^) shr 6);
inc(pn8);
end;
// Handle complete blocks
for i := 1 to (ByteCount-cnt) div sizeof(PtrUInt) do
begin
// Count bytes which are NOT the first byte of a character.
nx := ((pnx^ and EIGHTYMASK) shr 7) and ((not pnx^) shr 6);
{$push}{$overflowchecks off} // "nx * ONEMASK" causes an
arithmetic overflow.
Result += (nx * ONEMASK) >> ((sizeof(PtrInt) - 1) * 8);
{$pop}
inc(pnx);
end;
// Take care of any left-over bytes.
while ix<e do
begin
// Is this byte NOT the first byte of a character?
//writeln('pn8^ = ',byte(pn8^).ToBinString);
//writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString);
//writeln('not pn8^ = ',Byte(not pn8^).ToBinString);
//writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString);
//writeln;
//writeln('',);
Result += (pn8^ shr 7) and ((not pn8^) shr 6);
inc(pn8);
end;
Result := ByteCount - Result;
end;
function UTF8LengthFast_Unsigned(p: PChar; ByteCount: PtrInt): PtrInt;
const
{$ifdef CPU32}
ONEMASK =$01010101;
EIGHTYMASK=$80808080;
{$endif}
{$ifdef CPU64}
ONEMASK =$0101010101010101;
EIGHTYMASK=$8080808080808080;
{$endif}
var
pnx: PPtrUInt absolute p; // To get contents of text in PtrInt
blocks. x refers to 32 or 64 bits
pn8: puint8 absolute pnx; // To read text as Int8 in the initial and
final loops
ix: PtrUInt absolute pnx; // To read text as PtrInt in the block loop
nx: PtrUInt; // values processed in block loop
i,cnt,e: PtrInt;
begin
Result := 0;
e := ix+PtrUInt(ByteCount); // End marker
// Handle any initial misaligned bytes.
cnt := (not (ix-1)) and (sizeof(PtrInt)-1);
if cnt>ByteCount then
cnt := ByteCount;
for i := 1 to cnt do
begin
// Is this byte NOT the first byte of a character?
//writeln('pn8^ = ',byte(pn8^).ToBinString);
//writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString);
//writeln('not pn8^ = ',Byte(not pn8^).ToBinString);
//writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString);
//writeln;
Result += (pn8^ shr 7) and ((not pn8^) shr 6);
inc(pn8);
end;
// Handle complete blocks
for i := 1 to (ByteCount-cnt) div sizeof(PtrUInt) do
begin
// Count bytes which are NOT the first byte of a character.
nx := ((pnx^ and EIGHTYMASK) shr 7) and ((not pnx^) shr 6);
{$push}{$overflowchecks off} // "nx * ONEMASK" causes an
arithmetic overflow.
Result += (nx * ONEMASK) >> ((sizeof(PtrInt) - 1) * 8);
{$pop}
inc(pnx);
end;
// Take care of any left-over bytes.
while ix<e do
begin
// Is this byte NOT the first byte of a character?
//writeln('pn8^ = ',byte(pn8^).ToBinString);
//writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString);
//writeln('not pn8^ = ',Byte(not pn8^).ToBinString);
//writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString);
//writeln;
//writeln('',);
Result += (pn8^ shr 7) and ((not pn8^) shr 6);
inc(pn8);
end;
Result := ByteCount - Result;
end;
var
Len: PtrInt;
Euro: String;
begin
DefaultSystemCodePage := CP_UTF8;
Euro := '€';
writeln('Signed version');
Len := Utf8LengthFast_Signed(PChar(Euro), Length(Euro));
writeln('Len = ',Len);
writeln('Unsigned version');
Len := Utf8LengthFast_Unsigned(PChar(Euro), Length(Euro));
writeln('Len = ',Len);
end.
==========================
--
Bart
More information about the lazarus
mailing list