lexical analysis More...

#include <json.hpp>

Inheritance diagram for nlohmann::detail::lexer< BasicJsonType, InputAdapterType >:

Collaboration diagram for nlohmann::detail::lexer< BasicJsonType, InputAdapterType >:

Public Types
using	token_type = typename lexer_base<BasicJsonType>::token_type

Public Types inherited from nlohmann::detail::lexer_base< BasicJsonType >
enum class	token_type { uninitialized , literal_true , literal_false , literal_null , value_string , value_unsigned , value_integer , value_float , begin_array , begin_object , end_array , end_object , name_separator , value_separator , parse_error , end_of_input , literal_or_value }
	token types for the parser More...

Public Member Functions
	lexer (InputAdapterType &&adapter, bool ignore_comments_=false) noexcept

	lexer (const lexer &)=delete

	lexer (lexer &&)=default

lexer &	operator= (lexer &)=delete

lexer &	operator= (lexer &&)=default

	~lexer ()=default

constexpr number_integer_t	get_number_integer () const noexcept
	return integer value

constexpr number_unsigned_t	get_number_unsigned () const noexcept
	return unsigned integer value

constexpr number_float_t	get_number_float () const noexcept
	return floating-point value

string_t &	get_string ()
	return current string value (implicitly resets the token; useful only once)

constexpr position_t	get_position () const noexcept
	return position of last read token

std::string	get_token_string () const

JSON_HEDLEY_RETURNS_NON_NULL constexpr const char *	get_error_message () const noexcept
	return syntax error message

bool	skip_bom ()
	skip the UTF-8 byte order mark

void	skip_whitespace ()

token_type	scan ()

Private Types
using	number_integer_t = typename BasicJsonType::number_integer_t

using	number_unsigned_t = typename BasicJsonType::number_unsigned_t

using	number_float_t = typename BasicJsonType::number_float_t

using	string_t = typename BasicJsonType::string_t

using	char_type = typename InputAdapterType::char_type

using	char_int_type = typename std::char_traits<char_type>::int_type

Private Member Functions
int	get_codepoint ()
	get codepoint from 4 hex characters following `\u`

bool	next_byte_in_range (std::initializer_list< char_int_type > ranges)
	check if the next byte(s) are inside a given range

token_type	scan_string ()
	scan a string literal

bool	scan_comment ()
	scan a comment

token_type	scan_number ()
	scan a number literal

token_type	scan_literal (const char_type *literal_text, const std::size_t length, token_type return_type)

void	reset () noexcept
	reset token_buffer; current character is beginning of token

char_int_type	get ()

void	unget ()
	unget current character (read it again on next get)

void	add (char_int_type c)
	add a character to token_buffer

Static Private Member Functions
static JSON_HEDLEY_PURE char	get_decimal_point () noexcept
	return the locale-dependent decimal point

static void	strtof (float &f, const char str, char *endptr) noexcept

static void	strtof (double &f, const char str, char *endptr) noexcept

static void	strtof (long double &f, const char str, char *endptr) noexcept

Private Attributes
InputAdapterType	ia
	input adapter

const bool	ignore_comments = false
	whether comments should be ignored (true) or signaled as errors (false)

char_int_type	current = std::char_traits<char_type>::eof()
	the current character

bool	next_unget = false
	whether the next get() call should just return current

position_t	position {}
	the start position of the current token

std::vector< char_type >	token_string {}
	raw input token string (for error messages)

string_t	token_buffer {}
	buffer for variable-length tokens (numbers, strings)

const char *	error_message = ""
	a description of occurred lexer errors

number_integer_t	value_integer = 0

number_unsigned_t	value_unsigned = 0

number_float_t	value_float = 0

const char_int_type	decimal_point_char = '.'
	the decimal point

Additional Inherited Members
Static Public Member Functions inherited from nlohmann::detail::lexer_base< BasicJsonType >
JSON_HEDLEY_RETURNS_NON_NULL static JSON_HEDLEY_CONST const char *	token_type_name (const token_type t) noexcept
	return name of values of type token_type (only used for errors)

Detailed Description

template<typename BasicJsonType, typename InputAdapterType>
class nlohmann::detail::lexer< BasicJsonType, InputAdapterType >

lexical analysis

This class organizes the lexical analysis during JSON deserialization.

Definition at line 6638 of file json.hpp.

Member Typedef Documentation

◆ char_int_type

template<typename BasicJsonType , typename InputAdapterType >

using nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::char_int_type = typename std::char_traits<char_type>::int_type

private

Definition at line 6645 of file json.hpp.

◆ char_type

template<typename BasicJsonType , typename InputAdapterType >

using nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::char_type = typename InputAdapterType::char_type

private

Definition at line 6644 of file json.hpp.

◆ number_float_t

template<typename BasicJsonType , typename InputAdapterType >

using nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::number_float_t = typename BasicJsonType::number_float_t

private

Definition at line 6642 of file json.hpp.

◆ number_integer_t

template<typename BasicJsonType , typename InputAdapterType >

using nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::number_integer_t = typename BasicJsonType::number_integer_t

private

Definition at line 6640 of file json.hpp.

◆ number_unsigned_t

template<typename BasicJsonType , typename InputAdapterType >

using nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::number_unsigned_t = typename BasicJsonType::number_unsigned_t

private

Definition at line 6641 of file json.hpp.

◆ string_t

template<typename BasicJsonType , typename InputAdapterType >

using nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::string_t = typename BasicJsonType::string_t

private

Definition at line 6643 of file json.hpp.

◆ token_type

template<typename BasicJsonType , typename InputAdapterType >

using nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::token_type = typename lexer_base<BasicJsonType>::token_type

Definition at line 6648 of file json.hpp.

Constructor & Destructor Documentation

◆ lexer() [1/3]

template<typename BasicJsonType , typename InputAdapterType >

nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::lexer	(	InputAdapterType &&	adapter,
		bool	ignore_comments_ = false )

inlineexplicitnoexcept

Definition at line 6650 of file json.hpp.

◆ lexer() [2/3]

template<typename BasicJsonType , typename InputAdapterType >

nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::lexer ( const lexer< BasicJsonType, InputAdapterType > & )

delete

◆ lexer() [3/3]

template<typename BasicJsonType , typename InputAdapterType >

nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::lexer ( lexer< BasicJsonType, InputAdapterType > && )

default

◆ ~lexer()

template<typename BasicJsonType , typename InputAdapterType >

nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::~lexer ( )

default

Member Function Documentation

◆ add()

template<typename BasicJsonType , typename InputAdapterType >

void nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::add ( char_int_type c )

inlineprivate

add a character to token_buffer

Definition at line 7928 of file json.hpp.

◆ get()

template<typename BasicJsonType , typename InputAdapterType >

char_int_type nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get ( )

inlineprivate

Definition at line 7864 of file json.hpp.

◆ get_codepoint()

template<typename BasicJsonType , typename InputAdapterType >

int nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_codepoint ( )

inlineprivate

get codepoint from 4 hex characters following \u

For input "\u c1 c2 c3 c4" the codepoint is: (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0)

Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The conversion is done by subtracting the offset (0x30, 0x37, and 0x57) between the ASCII value of the character and the desired integer value.

Returns: codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or non-hex character)

Definition at line 6696 of file json.hpp.

◆ get_decimal_point()

template<typename BasicJsonType , typename InputAdapterType >

static JSON_HEDLEY_PURE char nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_decimal_point ( )

inlinestaticprivatenoexcept

return the locale-dependent decimal point

Definition at line 6670 of file json.hpp.

◆ get_error_message()

template<typename BasicJsonType , typename InputAdapterType >

JSON_HEDLEY_RETURNS_NON_NULL constexpr const char * nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_error_message ( ) const

inlineconstexprnoexcept

return syntax error message

Definition at line 8000 of file json.hpp.

◆ get_number_float()

template<typename BasicJsonType , typename InputAdapterType >

constexpr number_float_t nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_number_float ( ) const

inlineconstexprnoexcept

return floating-point value

Definition at line 7951 of file json.hpp.

◆ get_number_integer()

template<typename BasicJsonType , typename InputAdapterType >

constexpr number_integer_t nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_number_integer ( ) const

inlineconstexprnoexcept

return integer value

Definition at line 7939 of file json.hpp.

◆ get_number_unsigned()

template<typename BasicJsonType , typename InputAdapterType >

constexpr number_unsigned_t nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_number_unsigned ( ) const

inlineconstexprnoexcept

return unsigned integer value

Definition at line 7945 of file json.hpp.

◆ get_position()

template<typename BasicJsonType , typename InputAdapterType >

constexpr position_t nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_position ( ) const

inlineconstexprnoexcept

return position of last read token

Definition at line 7967 of file json.hpp.

◆ get_string()

template<typename BasicJsonType , typename InputAdapterType >

string_t & nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_string ( )

inline

return current string value (implicitly resets the token; useful only once)

Definition at line 7957 of file json.hpp.

◆ get_token_string()

template<typename BasicJsonType , typename InputAdapterType >

std::string nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::get_token_string ( ) const

inline

return the last read token (for errors only). Will never contain EOF (an arbitrary value that is not a valid char value, often -1), because 255 may legitimately occur. May contain NUL, which should be escaped.

Definition at line 7975 of file json.hpp.

◆ next_byte_in_range()

template<typename BasicJsonType , typename InputAdapterType >

bool nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::next_byte_in_range ( std::initializer_list< char_int_type > ranges )

inlineprivate

check if the next byte(s) are inside a given range

Adds the current byte and, for each passed range, reads a new byte and checks if it is inside the range. If a violation was detected, set up an error message and return false. Otherwise, return true.

Parameters

[in] ranges list of integers; interpreted as list of pairs of inclusive lower and upper bound, respectively

Precondition: The passed list ranges must have 2, 4, or 6 elements; that is, 1, 2, or 3 pairs. This precondition is enforced by an assertion.

Returns: true if and only if no range violation was detected

Definition at line 6744 of file json.hpp.

◆ operator=() [1/2]

template<typename BasicJsonType , typename InputAdapterType >

lexer & nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::operator= ( lexer< BasicJsonType, InputAdapterType > && )

default

◆ operator=() [2/2]

template<typename BasicJsonType , typename InputAdapterType >

lexer & nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::operator= ( lexer< BasicJsonType, InputAdapterType > & )

delete

◆ reset()

template<typename BasicJsonType , typename InputAdapterType >

void nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::reset ( )

inlineprivatenoexcept

reset token_buffer; current character is beginning of token

Definition at line 7847 of file json.hpp.

◆ scan()

template<typename BasicJsonType , typename InputAdapterType >

token_type nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::scan ( )

inline

Definition at line 8035 of file json.hpp.

◆ scan_comment()

template<typename BasicJsonType , typename InputAdapterType >

bool nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::scan_comment ( )

inlineprivate

scan a comment

Returns: whether comment could be scanned successfully

Definition at line 7371 of file json.hpp.

◆ scan_literal()

template<typename BasicJsonType , typename InputAdapterType >

token_type nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::scan_literal	(	const char_type *	literal_text,
		const std::size_t	length,
		token_type	return_type )

inlineprivate

Parameters

[in]	literal_text	the literal text to expect
[in]	length	the length of the passed literal text
[in]	return_type	the token type to return on success

Definition at line 7827 of file json.hpp.

◆ scan_number()

template<typename BasicJsonType , typename InputAdapterType >

token_type nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::scan_number ( )

inlineprivate

scan a number literal

This function scans a string according to Sect. 6 of RFC 8259.

The function is realized with a deterministic finite state machine derived from the grammar described in RFC 8259. Starting in state "init", the input is read and used to determined the next state. Only state "done" accepts the number. State "error" is a trap state to model errors. In the table below, "anything" means any character but the ones listed before.

state	0	1-9	e E	+	-	.	anything
init	zero	any1	[error]	[error]	minus	[error]	[error]
minus	zero	any1	[error]	[error]	[error]	[error]	[error]
zero	done	done	exponent	done	done	decimal1	done
any1	any1	any1	exponent	done	done	decimal1	done
decimal1	decimal2	decimal2	[error]	[error]	[error]	[error]	[error]
decimal2	decimal2	decimal2	exponent	done	done	done	done
exponent	any2	any2	[error]	sign	sign	[error]	[error]
sign	any2	any2	[error]	[error]	[error]	[error]	[error]
any2	any2	any2	done	done	done	done	done

The state machine is realized with one label per state (prefixed with "scan_number_") and goto statements between them. The state machine contains cycles, but any cycle can be left when EOF is read. Therefore, the function is guaranteed to terminate.

During scanning, the read bytes are stored in token_buffer. This string is then converted to a signed integer, an unsigned integer, or a floating-point number.

Returns: token_type::value_unsigned, token_type::value_integer, or token_type::value_float if number could be successfully scanned, token_type::parse_error otherwise

Note: The scanner is independent of the current locale. Internally, the locale's decimal point is used instead of . to work with the locale-dependent converters.

Definition at line 7496 of file json.hpp.

◆ scan_string()

template<typename BasicJsonType , typename InputAdapterType >

token_type nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::scan_string ( )

inlineprivate

scan a string literal

This function scans a string according to Sect. 7 of RFC 8259. While scanning, bytes are escaped and copied into buffer token_buffer. Then the function returns successfully, token_buffer is not null-terminated (as it may contain \0 bytes), and token_buffer.size() is the number of bytes in the string.

Returns: token_type::value_string if string could be successfully scanned, token_type::parse_error otherwise

Note: In case of errors, variable error_message contains a textual description.

Definition at line 6781 of file json.hpp.

◆ skip_bom()

template<typename BasicJsonType , typename InputAdapterType >

bool nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::skip_bom ( )

inline

skip the UTF-8 byte order mark

Returns: true iff there is no BOM or the correct BOM has been skipped

Definition at line 8013 of file json.hpp.

◆ skip_whitespace()

template<typename BasicJsonType , typename InputAdapterType >

void nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::skip_whitespace ( )

inline

Definition at line 8027 of file json.hpp.

◆ strtof() [1/3]

template<typename BasicJsonType , typename InputAdapterType >

static void nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::strtof	(	double &	f,
		const char *	str,
		char **	endptr )

inlinestaticprivatenoexcept

Definition at line 7445 of file json.hpp.

◆ strtof() [2/3]

template<typename BasicJsonType , typename InputAdapterType >

static void nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::strtof	(	float &	f,
		const char *	str,
		char **	endptr )

inlinestaticprivatenoexcept

Definition at line 7439 of file json.hpp.

◆ strtof() [3/3]

template<typename BasicJsonType , typename InputAdapterType >

static void nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::strtof	(	long double &	f,
		const char *	str,
		char **	endptr )

inlinestaticprivatenoexcept

Definition at line 7451 of file json.hpp.

◆ unget()

template<typename BasicJsonType , typename InputAdapterType >

void nlohmann::detail::lexer< BasicJsonType, InputAdapterType >::unget ( )

inlineprivate

unget current character (read it again on next get)

We implement unget by setting variable next_unget to true. The input is not changed - we just simulate ungetting by modifying chars_read_total, chars_read_current_line, and token_string. The next call to get() will behave as if the unget character is read again.

Definition at line 7901 of file json.hpp.