SL-18330: Fix new C++ <-> Python LLSD compatibility tests.

When sending multiple LEAP packets in the same file (for testing convenience),
use a length prefix instead of delimiting with '\n'. Now that we allow a
serialization format that includes an LLSD format header (e.g.
"<?llsd/binary?>"), '\n' is part of the packet content. But in fact, testing
binary LLSD means we can't pick any delimiter guaranteed not to appear in the
packet content.

Using a length prefix also lets us pass a specific max_bytes to the subject
C++ LLSD parser.

Make llleap_test.cpp use new freestanding Python llsd package when available.
Update Python-side LEAP protocol code to work directly with encoded bytes
stream, avoiding bytes<->str encoding and decoding, which breaks binary LLSD.

Make LLSDSerialize::deserialize() recognize LLSD format header case-
insensitively. Python emits and checks for "llsd/binary", while LLSDSerialize
emits and checks for "LLSD/Binary". Once any of the headers is recognized,
pass corrected max_bytes to the specific parser.

Make deserialize() more careful about the no-header case: preserve '\n' in
content. Introduce debugging code (disabled) because it's a little tricky to
recreate.

Revert LLLeap child process stdout parser from LLSDSerialize::deserialize() to
the specific LLSDNotationParser(), as at present: the generic parser fails one
of LLLeap's integration tests for reasons that remain mysterious.
master
Nat Goodspeed 2022-12-02 15:17:56 -05:00
parent b180e4de23
commit 2f557cd7fa
4 changed files with 147 additions and 61 deletions

View File

@ -317,8 +317,17 @@ public:
LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got "
<< childout.size() << ", parsing LLSD" << LL_ENDL;
LLSD data;
#if 1
// specifically require notation LLSD from child
LLPointer<LLSDParser> parser(new LLSDNotationParser());
S32 parse_status(parser->parse(childout.get_istream(), data, mExpect));
if (parse_status == LLSDParser::PARSE_FAILURE)
#else
// SL-18330: accept any valid LLSD serialization format from child
// Unfortunately this runs into trouble we have not yet debugged.
bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect));
if (! parse_status)
#endif
{
bad_protocol("unparseable LLSD data");
}

View File

@ -123,12 +123,26 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)
bool fail_if_not_legacy = false;
/*
* Get the first line before anything.
* Get the first line before anything. Don't read more than max_bytes:
* this get() overload reads no more than (count-1) bytes into the
* specified buffer. In the usual case when max_bytes exceeds
* sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2.
*/
// Remember str's original input position: if there's no header, we'll
// want to back up and retry.
str.get(hdr_buf, sizeof(hdr_buf), '\n');
str.get(hdr_buf, std::min(max_bytes+1, sizeof(hdr_buf)-1), '\n');
auto inbuf = str.gcount();
// https://en.cppreference.com/w/cpp/io/basic_istream/get
// When the get() above sees the specified delimiter '\n', it stops there
// without pulling it from the stream. If it turns out that the stream
// does NOT contain a header, and the content includes meaningful '\n',
// it's important to pull that into hdr_buf too.
if (inbuf < max_bytes && str.get(hdr_buf[inbuf]))
{
// got the delimiting '\n'
++inbuf;
// None of the following requires that hdr_buf contain a final '\0'
// byte. We could store one if needed, since even the incremented
// inbuf won't exceed sizeof(hdr_buf)-1, but there's no need.
}
std::string header{ hdr_buf, inbuf };
if (str.fail())
{
@ -175,17 +189,17 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)
/*
* Create the parser as appropriate
*/
if (header == LLSD_BINARY_HEADER)
if (0 == LLStringUtil::compareInsensitive(header, LLSD_BINARY_HEADER))
{
return (parse_using<LLSDBinaryParser>(str, sd, max_bytes) > 0);
return (parse_using<LLSDBinaryParser>(str, sd, max_bytes-inbuf) > 0);
}
else if (header == LLSD_XML_HEADER)
else if (0 == LLStringUtil::compareInsensitive(header, LLSD_XML_HEADER))
{
return (parse_using<LLSDXMLParser>(str, sd, max_bytes) > 0);
return (parse_using<LLSDXMLParser>(str, sd, max_bytes-inbuf) > 0);
}
else if (header == LLSD_NOTATION_HEADER)
else if (0 == LLStringUtil::compareInsensitive(header, LLSD_NOTATION_HEADER))
{
return (parse_using<LLSDNotationParser>(str, sd, max_bytes) > 0);
return (parse_using<LLSDNotationParser>(str, sd, max_bytes-inbuf) > 0);
}
else // no header we recognize
{
@ -207,7 +221,22 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)
LLMemoryStreamBuf already(reinterpret_cast<const U8*>(hdr_buf), inbuf);
cat_streambuf prebuff(&already, str.rdbuf());
std::istream prepend(&prebuff);
#if 1
return (p->parse(prepend, sd, max_bytes) > 0);
#else
// debugging the reconstituted 'prepend' stream
// allocate a buffer that we hope is big enough for the whole thing
std::vector<char> wholemsg((max_bytes == size_t(SIZE_UNLIMITED))? 1024 : max_bytes);
prepend.read(wholemsg.data(), std::min(max_bytes, wholemsg.size()));
LLMemoryStream replay(reinterpret_cast<const U8*>(wholemsg.data()), prepend.gcount());
auto success{ p->parse(replay, sd, prepend.gcount()) > 0 };
{
LL_DEBUGS() << (success? "parsed: $$" : "failed: '")
<< std::string(wholemsg.data(), llmin(prepend.gcount(), 100)) << "$$"
<< LL_ENDL;
}
return success;
#endif
}
}

View File

@ -110,12 +110,12 @@ namespace tut
"import os\n"
"import sys\n"
"\n"
// Don't forget that this Python script is written to some
// temp directory somewhere! Its __file__ is useless in
// finding indra/lib/python. Use our __FILE__, with
// raw-string syntax to deal with Windows pathnames.
"mydir = os.path.dirname(r'" << __FILE__ << "')\n"
"from llbase import llsd\n"
"try:\n"
// new freestanding llsd package
" import llsd\n"
"except ImportError:\n"
// older llbase.llsd module
" from llbase import llsd\n"
"\n"
"class ProtocolError(Exception):\n"
" def __init__(self, msg, data):\n"
@ -126,26 +126,26 @@ namespace tut
" pass\n"
"\n"
"def get():\n"
" hdr = ''\n"
" while ':' not in hdr and len(hdr) < 20:\n"
" hdr += sys.stdin.read(1)\n"
" hdr = []\n"
" while b':' not in hdr and len(hdr) < 20:\n"
" hdr.append(sys.stdin.buffer.read(1))\n"
" if not hdr:\n"
" sys.exit(0)\n"
" if not hdr.endswith(':'):\n"
" if not hdr[-1] == b':':\n"
" raise ProtocolError('Expected len:data, got %r' % hdr, hdr)\n"
" try:\n"
" length = int(hdr[:-1])\n"
" length = int(b''.join(hdr[:-1]))\n"
" except ValueError:\n"
" raise ProtocolError('Non-numeric len %r' % hdr[:-1], hdr[:-1])\n"
" parts = []\n"
" received = 0\n"
" while received < length:\n"
" parts.append(sys.stdin.read(length - received))\n"
" parts.append(sys.stdin.buffer.read(length - received))\n"
" received += len(parts[-1])\n"
" data = ''.join(parts)\n"
" data = b''.join(parts)\n"
" assert len(data) == length\n"
" try:\n"
" return llsd.parse(data.encode())\n"
" return llsd.parse(data)\n"
// Seems the old indra.base.llsd module didn't properly
// convert IndexError (from running off end of string) to
// LLSDParseError.
@ -185,11 +185,11 @@ namespace tut
" return _reply\n"
"\n"
"def put(req):\n"
" sys.stdout.write(':'.join((str(len(req)), req)))\n"
" sys.stdout.buffer.write(b'%d:%b' % (len(req), req))\n"
" sys.stdout.flush()\n"
"\n"
"def send(pump, data):\n"
" put(llsd.format_notation(dict(pump=pump, data=data)).decode())\n"
" put(llsd.format_notation(dict(pump=pump, data=data)))\n"
"\n"
"def request(pump, data):\n"
" # we expect 'data' is a dict\n"

View File

@ -51,10 +51,11 @@ typedef U32 uint32_t;
#include "boost/phoenix/core/argument.hpp"
using namespace boost::phoenix;
#include "../llsd.h"
#include "../llsdserialize.h"
#include "llsd.h"
#include "llsdserialize.h"
#include "llsdutil.h"
#include "../llformat.h"
#include "llformat.h"
#include "llmemorystream.h"
#include "../test/lltut.h"
#include "../test/namedtempfile.h"
@ -1898,14 +1899,22 @@ namespace tut
static void writeLLSDArray(const FormatterFunction& serialize,
std::ostream& out, const LLSD& array)
{
BOOST_FOREACH(LLSD item, llsd::inArray(array))
for (const LLSD& item : llsd::inArray(array))
{
serialize(item, out);
// It's important to separate with newlines because Python's llsd
// module doesn't support parsing from a file stream, only from a
// string, so we have to know how much of the file to read into a
// string.
out << '\n';
// It's important to delimit the entries in this file somehow
// because, although Python's llsd.parse() can accept a file
// stream, the XML parser expects EOF after a single outer element
// -- it doesn't just stop. So we must extract a sequence of bytes
// strings from the file. But since one of the serialization
// formats we want to test is binary, we can't pick any single
// byte value as a delimiter! Use a binary integer length prefix
// instead.
std::ostringstream buffer;
serialize(item, buffer);
auto buffstr{ buffer.str() };
int bufflen{ static_cast<int>(buffstr.length()) };
out.write(reinterpret_cast<const char*>(&bufflen), sizeof(bufflen));
out.write(buffstr.c_str(), buffstr.length());
}
}
@ -1932,7 +1941,7 @@ namespace tut
" except StopIteration:\n"
" pass\n"
" else:\n"
" assert False, 'Too many data items'\n";
" raise AssertionError('Too many data items')\n";
// Create an llsdXXXXXX file containing 'data' serialized to
// notation.
@ -1948,10 +1957,23 @@ namespace tut
python("read C++ " + desc,
placeholders::arg1 <<
import_llsd <<
"def parse_each(iterable):\n"
" for item in iterable:\n"
" yield llsd.parse(item)\n" <<
pydata <<
"from functools import partial\n"
"import struct\n"
"lenformat = struct.Struct('i')\n"
"def parse_each(inf):\n"
" for rawlen in iter(partial(inf.read, lenformat.size), b''):\n"
" len = lenformat.unpack(rawlen)[0]\n"
// Since llsd.parse() has no max_bytes argument, instead of
// passing the input stream directly to parse(), read the item
// into a distinct bytes object and parse that.
" data = inf.read(len)\n"
" try:\n"
" yield llsd.parse(data)\n"
" except llsd.LLSDParseError as err:\n"
" print(f'*** {err}')\n"
" print(f'Bad content:\\n{data!r}')\n"
" raise\n"
<< pydata <<
// Don't forget raw-string syntax for Windows pathnames.
"verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n");
}
@ -2008,6 +2030,26 @@ namespace tut
}
|*==========================================================================*/
// helper for test<8> - test<12>
bool itemFromStream(std::istream& istr, LLSD& item, const ParserFunction& parse)
{
// reset the output value for debugging clarity
item.clear();
// We use an int length prefix as a foolproof delimiter even for
// binary serialized streams.
int length{ 0 };
istr.read(reinterpret_cast<char*>(&length), sizeof(length));
// return parse(istr, item, length);
// Sadly, as of 2022-12-01 it seems we can't really trust our LLSD
// parsers to honor max_bytes: this test works better when we read
// each item into its own distinct LLMemoryStream, instead of passing
// the original istr with a max_bytes constraint.
std::vector<U8> buffer(length);
istr.read(reinterpret_cast<char*>(buffer.data()), length);
LLMemoryStream stream(buffer.data(), length);
return parse(stream, item, length);
}
// helper for test<8> - test<12>
void fromPythonUsing(const std::string& pyformatter,
const ParserFunction& parse=
@ -2022,6 +2064,8 @@ namespace tut
python("Python " + pyformatter,
placeholders::arg1 <<
import_llsd <<
"import struct\n"
"lenformat = struct.Struct('i')\n"
"DATA = [\n"
" 17,\n"
" 3.14,\n"
@ -2034,29 +2078,33 @@ namespace tut
// N.B. Using 'print' implicitly adds newlines.
"with open(r'" << file.getName() << "', 'wb') as f:\n"
" for item in DATA:\n"
" print(llsd." << pyformatter << "(item), file=f)\n");
" serialized = llsd." << pyformatter << "(item)\n"
" f.write(lenformat.pack(len(serialized)))\n"
" f.write(serialized)\n");
std::ifstream inf(file.getName().c_str());
LLSD item;
// Notice that we're not doing anything special to parse out the
// newlines: LLSDSerialize::fromNotation ignores them. While it would
// seem they're not strictly necessary, going in this direction, we
// want to ensure that notation-separated-by-newlines works in both
// directions -- since in practice, a given file might be read by
// either language.
ensure("Failed to read LLSD::Integer from Python",
parse(inf, item, LLSDSerialize::SIZE_UNLIMITED));
ensure_equals(item.asInteger(), 17);
ensure("Failed to read LLSD::Real from Python",
parse(inf, item, LLSDSerialize::SIZE_UNLIMITED));
ensure_approximately_equals("Bad LLSD::Real value from Python",
item.asReal(), 3.14, 7); // 7 bits ~= 0.01
ensure("Failed to read LLSD::String from Python",
parse(inf, item, LLSDSerialize::SIZE_UNLIMITED));
ensure_equals(item.asString(),
"This string\n"
"has several\n"
"lines.");
try
{
ensure("Failed to read LLSD::Integer from Python",
itemFromStream(inf, item, parse));
ensure_equals(item.asInteger(), 17);
ensure("Failed to read LLSD::Real from Python",
itemFromStream(inf, item, parse));
ensure_approximately_equals("Bad LLSD::Real value from Python",
item.asReal(), 3.14, 7); // 7 bits ~= 0.01
ensure("Failed to read LLSD::String from Python",
itemFromStream(inf, item, parse));
ensure_equals(item.asString(),
"This string\n"
"has several\n"
"lines.");
}
catch (const tut::failure& err)
{
std::cout << "for " << err.what() << ", item = " << item << std::endl;
throw;
}
}
template<> template<>