mirror of
https://gitcode.com/gh_mirrors/ope/OpenFace.git
synced 2026-05-15 11:47:50 +00:00
Master commit of OpenFace.
This commit is contained in:
675
lib/3rdParty/dlib/include/dlib/cpp_tokenizer/cpp_tokenizer_kernel_1.h
vendored
Normal file
675
lib/3rdParty/dlib/include/dlib/cpp_tokenizer/cpp_tokenizer_kernel_1.h
vendored
Normal file
@@ -0,0 +1,675 @@
|
||||
// Copyright (C) 2005 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_CPP_TOKENIZER_KERNEl_1_
|
||||
#define DLIB_CPP_TOKENIZER_KERNEl_1_
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "cpp_tokenizer_kernel_abstract.h"
|
||||
#include "../algs.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
namespace cpp_tok_kernel_1_helper
|
||||
{
|
||||
struct token_text_pair
|
||||
{
|
||||
std::string token;
|
||||
int type;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
class cpp_tokenizer_kernel_1
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON tok
|
||||
tok must be an implementation of tokenizer/tokenizer_kernel_abstract.h
|
||||
|
||||
REQUIREMENTS ON queue
|
||||
queue must be an implementation of queue/queue_kernel_abstract.h
|
||||
and must have T==cpp_tok_kernel_1_helper::token_text_pair
|
||||
|
||||
REQUIREMENTS ON set
|
||||
set must be an implemention of set/set_kernel_abstract.h or
|
||||
hash_set/hash_set_kernel_abstract.h and must have T==std::string.
|
||||
|
||||
INITIAL VALUE
|
||||
- keywords == a set of all the C++ keywords
|
||||
- tokenizer.stream_is_set() == false
|
||||
- buffer.size() == 0
|
||||
- tokenizer.get_identifier_head() == "$_" + tokenizer.lowercase_letters() +
|
||||
tokenizer.uppercase_letters()
|
||||
- tokenizer.get_identifier_body() == "$_" + tokenizer.lowercase_letters() +
|
||||
tokenizer.uppercase_letters() + tokenizer.numbers()
|
||||
- have_peeked == false
|
||||
|
||||
|
||||
CONVENTION
|
||||
- tokenizer.stream_is_set() == stream_is_set()
|
||||
- tokenizer.get_stream() == get_stream()
|
||||
- keywords == a set of all the C++ keywords
|
||||
|
||||
- tokenizer.get_identifier_head() == "$_" + tokenizer.lowercase_letters() +
|
||||
tokenizer.uppercase_letters()
|
||||
- tokenizer.get_identifier_body() == "$_" + tokenizer.lowercase_letters() +
|
||||
tokenizer.uppercase_letters() + tokenizer.numbers()
|
||||
|
||||
- buffer == a queue of tokens. This is where we put tokens
|
||||
we gathered early due to looking ahead.
|
||||
|
||||
|
||||
- if (have_peeked) then
|
||||
- next_token == the next token to be returned from get_token()
|
||||
- next_type == the type of token in peek_token
|
||||
!*/
|
||||
|
||||
typedef cpp_tok_kernel_1_helper::token_text_pair token_text_pair;
|
||||
|
||||
public:
|
||||
|
||||
enum
|
||||
{
|
||||
END_OF_FILE,
|
||||
KEYWORD,
|
||||
COMMENT,
|
||||
SINGLE_QUOTED_TEXT,
|
||||
DOUBLE_QUOTED_TEXT,
|
||||
IDENTIFIER,
|
||||
OTHER,
|
||||
NUMBER,
|
||||
WHITE_SPACE
|
||||
};
|
||||
|
||||
cpp_tokenizer_kernel_1 (
|
||||
);
|
||||
|
||||
virtual ~cpp_tokenizer_kernel_1 (
|
||||
);
|
||||
|
||||
void clear(
|
||||
);
|
||||
|
||||
void set_stream (
|
||||
std::istream& in
|
||||
);
|
||||
|
||||
bool stream_is_set (
|
||||
) const;
|
||||
|
||||
std::istream& get_stream (
|
||||
) const;
|
||||
|
||||
void get_token (
|
||||
int& type,
|
||||
std::string& token
|
||||
);
|
||||
|
||||
int peek_type (
|
||||
) const;
|
||||
|
||||
const std::string& peek_token (
|
||||
) const;
|
||||
|
||||
void swap (
|
||||
cpp_tokenizer_kernel_1<tok,queue,set>& item
|
||||
);
|
||||
|
||||
private:
|
||||
|
||||
void buffer_token(
|
||||
int type,
|
||||
const std::string& token
|
||||
)
|
||||
/*!
|
||||
ensures
|
||||
- stores the token and its type into buffer
|
||||
!*/
|
||||
{
|
||||
token_text_pair temp;
|
||||
temp.token = token;
|
||||
temp.type = type;
|
||||
buffer.enqueue(temp);
|
||||
}
|
||||
|
||||
void buffer_token(
|
||||
int type,
|
||||
char token
|
||||
)
|
||||
/*!
|
||||
ensures
|
||||
- stores the token and its type into buffer
|
||||
!*/
|
||||
{
|
||||
token_text_pair temp;
|
||||
temp.token = token;
|
||||
temp.type = type;
|
||||
buffer.enqueue(temp);
|
||||
}
|
||||
|
||||
// restricted functions
|
||||
cpp_tokenizer_kernel_1(const cpp_tokenizer_kernel_1<tok,queue,set>&); // copy constructor
|
||||
cpp_tokenizer_kernel_1<tok,queue,set>& operator=(const cpp_tokenizer_kernel_1<tok,queue,set>&); // assignment operator
|
||||
|
||||
// data members
|
||||
set keywords;
|
||||
queue buffer;
|
||||
tok tokenizer;
|
||||
|
||||
mutable std::string next_token;
|
||||
mutable int next_type;
|
||||
mutable bool have_peeked;
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
inline void swap (
|
||||
cpp_tokenizer_kernel_1<tok,queue,set>& a,
|
||||
cpp_tokenizer_kernel_1<tok,queue,set>& b
|
||||
) { a.swap(b); }
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// member function definitions
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
cpp_tokenizer_kernel_1(
|
||||
) :
|
||||
have_peeked(false)
|
||||
{
|
||||
// add C++ keywords to keywords
|
||||
std::string temp;
|
||||
temp = "#include"; keywords.add(temp);
|
||||
temp = "__asm"; keywords.add(temp);
|
||||
temp = "_asm"; keywords.add(temp);
|
||||
temp = "if"; keywords.add(temp);
|
||||
temp = "int"; keywords.add(temp);
|
||||
temp = "else"; keywords.add(temp);
|
||||
temp = "template"; keywords.add(temp);
|
||||
temp = "void"; keywords.add(temp);
|
||||
temp = "false"; keywords.add(temp);
|
||||
temp = "class"; keywords.add(temp);
|
||||
temp = "public"; keywords.add(temp);
|
||||
temp = "while"; keywords.add(temp);
|
||||
temp = "bool"; keywords.add(temp);
|
||||
temp = "new"; keywords.add(temp);
|
||||
temp = "delete"; keywords.add(temp);
|
||||
temp = "true"; keywords.add(temp);
|
||||
temp = "typedef"; keywords.add(temp);
|
||||
temp = "const"; keywords.add(temp);
|
||||
temp = "virtual"; keywords.add(temp);
|
||||
temp = "inline"; keywords.add(temp);
|
||||
temp = "for"; keywords.add(temp);
|
||||
temp = "break"; keywords.add(temp);
|
||||
temp = "struct"; keywords.add(temp);
|
||||
temp = "float"; keywords.add(temp);
|
||||
temp = "case"; keywords.add(temp);
|
||||
temp = "enum"; keywords.add(temp);
|
||||
temp = "this"; keywords.add(temp);
|
||||
temp = "typeid"; keywords.add(temp);
|
||||
temp = "double"; keywords.add(temp);
|
||||
temp = "char"; keywords.add(temp);
|
||||
temp = "typename"; keywords.add(temp);
|
||||
temp = "signed"; keywords.add(temp);
|
||||
temp = "friend"; keywords.add(temp);
|
||||
temp = "wint_t"; keywords.add(temp);
|
||||
temp = "default"; keywords.add(temp);
|
||||
temp = "asm"; keywords.add(temp);
|
||||
temp = "reinterpret_cast"; keywords.add(temp);
|
||||
temp = "#define"; keywords.add(temp);
|
||||
temp = "do"; keywords.add(temp);
|
||||
temp = "continue"; keywords.add(temp);
|
||||
temp = "auto"; keywords.add(temp);
|
||||
temp = "unsigned"; keywords.add(temp);
|
||||
temp = "size_t"; keywords.add(temp);
|
||||
temp = "#undef"; keywords.add(temp);
|
||||
temp = "#pragma"; keywords.add(temp);
|
||||
temp = "namespace"; keywords.add(temp);
|
||||
temp = "private"; keywords.add(temp);
|
||||
temp = "#endif"; keywords.add(temp);
|
||||
temp = "catch"; keywords.add(temp);
|
||||
temp = "#else"; keywords.add(temp);
|
||||
temp = "register"; keywords.add(temp);
|
||||
temp = "volatile"; keywords.add(temp);
|
||||
temp = "const_cast"; keywords.add(temp);
|
||||
temp = "#end"; keywords.add(temp);
|
||||
temp = "mutable"; keywords.add(temp);
|
||||
temp = "static_cast"; keywords.add(temp);
|
||||
temp = "wchar_t"; keywords.add(temp);
|
||||
temp = "#if"; keywords.add(temp);
|
||||
temp = "protected"; keywords.add(temp);
|
||||
temp = "throw"; keywords.add(temp);
|
||||
temp = "using"; keywords.add(temp);
|
||||
temp = "dynamic_cast"; keywords.add(temp);
|
||||
temp = "#ifdef"; keywords.add(temp);
|
||||
temp = "return"; keywords.add(temp);
|
||||
temp = "short"; keywords.add(temp);
|
||||
temp = "#error"; keywords.add(temp);
|
||||
temp = "#line"; keywords.add(temp);
|
||||
temp = "explicit"; keywords.add(temp);
|
||||
temp = "union"; keywords.add(temp);
|
||||
temp = "#ifndef"; keywords.add(temp);
|
||||
temp = "try"; keywords.add(temp);
|
||||
temp = "sizeof"; keywords.add(temp);
|
||||
temp = "goto"; keywords.add(temp);
|
||||
temp = "long"; keywords.add(temp);
|
||||
temp = "#elif"; keywords.add(temp);
|
||||
temp = "static"; keywords.add(temp);
|
||||
temp = "operator"; keywords.add(temp);
|
||||
temp = "switch"; keywords.add(temp);
|
||||
temp = "extern"; keywords.add(temp);
|
||||
|
||||
|
||||
// set the tokenizer's IDENTIFIER token for C++ identifiers
|
||||
tokenizer.set_identifier_token(
|
||||
"$_" + tokenizer.lowercase_letters() + tokenizer.uppercase_letters(),
|
||||
"$_" + tokenizer.lowercase_letters() + tokenizer.uppercase_letters() +
|
||||
tokenizer.numbers()
|
||||
);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
~cpp_tokenizer_kernel_1 (
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
void cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
clear(
|
||||
)
|
||||
{
|
||||
tokenizer.clear();
|
||||
buffer.clear();
|
||||
have_peeked = false;
|
||||
|
||||
// set the tokenizer's IDENTIFIER token for C++ identifiers
|
||||
tokenizer.set_identifier_token(
|
||||
"$_" + tokenizer.lowercase_letters() + tokenizer.uppercase_letters(),
|
||||
"$_" + tokenizer.lowercase_letters() + tokenizer.uppercase_letters() +
|
||||
tokenizer.numbers()
|
||||
);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
void cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
set_stream (
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
tokenizer.set_stream(in);
|
||||
buffer.clear();
|
||||
have_peeked = false;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
bool cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
stream_is_set (
|
||||
) const
|
||||
{
|
||||
return tokenizer.stream_is_set();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
std::istream& cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
get_stream (
|
||||
) const
|
||||
{
|
||||
return tokenizer.get_stream();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
void cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
get_token (
|
||||
int& type,
|
||||
std::string& token
|
||||
)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
if (!have_peeked)
|
||||
{
|
||||
|
||||
if (buffer.size() > 0)
|
||||
{
|
||||
// just return what is in the buffer
|
||||
token_text_pair temp;
|
||||
buffer.dequeue(temp);
|
||||
type = temp.type;
|
||||
token = temp.token;
|
||||
return;
|
||||
}
|
||||
|
||||
tokenizer.get_token(type,token);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case tok::END_OF_FILE:
|
||||
{
|
||||
type = END_OF_FILE;
|
||||
} break;
|
||||
|
||||
case tok::END_OF_LINE:
|
||||
case tok::WHITE_SPACE:
|
||||
{
|
||||
type = tokenizer.peek_type();
|
||||
if (type == tok::END_OF_LINE || type == tok::WHITE_SPACE)
|
||||
{
|
||||
std::string temp;
|
||||
do
|
||||
{
|
||||
tokenizer.get_token(type,temp);
|
||||
token += temp;
|
||||
type = tokenizer.peek_type();
|
||||
}while (type == tok::END_OF_LINE || type == tok::WHITE_SPACE);
|
||||
}
|
||||
type = WHITE_SPACE;
|
||||
|
||||
} break;
|
||||
|
||||
case tok::NUMBER:
|
||||
{
|
||||
// this could be a hex number such as 0xa33. we should check for this.
|
||||
if (tokenizer.peek_type() == tok::IDENTIFIER && token == "0" &&
|
||||
(tokenizer.peek_token()[0] == 'x' || tokenizer.peek_token()[0] == 'X'))
|
||||
{
|
||||
// this is a hex number so accumulate all the numbers and identifiers that follow
|
||||
// because they have to be part of the number
|
||||
std::string temp;
|
||||
tokenizer.get_token(type,temp);
|
||||
token = "0" + temp;
|
||||
|
||||
// get the rest of the hex number
|
||||
while (tokenizer.peek_type() == tok::IDENTIFIER ||
|
||||
tokenizer.peek_type() == tok::NUMBER
|
||||
)
|
||||
{
|
||||
tokenizer.get_token(type,temp);
|
||||
token += temp;
|
||||
}
|
||||
|
||||
}
|
||||
// or this could be a floating point value or something with an 'e' or 'E' in it.
|
||||
else if ((tokenizer.peek_type() == tok::CHAR && tokenizer.peek_token()[0] == '.') ||
|
||||
(tokenizer.peek_type() == tok::IDENTIFIER && std::tolower(tokenizer.peek_token()[0]) == 'e'))
|
||||
{
|
||||
std::string temp;
|
||||
tokenizer.get_token(type,temp);
|
||||
token += temp;
|
||||
// now get the rest of the floating point value
|
||||
while (tokenizer.peek_type() == tok::IDENTIFIER ||
|
||||
tokenizer.peek_type() == tok::NUMBER
|
||||
)
|
||||
{
|
||||
tokenizer.get_token(type,temp);
|
||||
token += temp;
|
||||
}
|
||||
}
|
||||
type = NUMBER;
|
||||
|
||||
} break;
|
||||
|
||||
case tok::IDENTIFIER:
|
||||
{
|
||||
if (keywords.is_member(token))
|
||||
{
|
||||
type = KEYWORD;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = IDENTIFIER;
|
||||
}
|
||||
} break;
|
||||
|
||||
case tok::CHAR:
|
||||
type = OTHER;
|
||||
switch (token[0])
|
||||
{
|
||||
case '#':
|
||||
{
|
||||
// this might be a preprocessor keyword so we should check the
|
||||
// next token
|
||||
if (tokenizer.peek_type() == tok::IDENTIFIER &&
|
||||
keywords.is_member('#'+tokenizer.peek_token()))
|
||||
{
|
||||
tokenizer.get_token(type,token);
|
||||
token = '#' + token;
|
||||
type = KEYWORD;
|
||||
}
|
||||
else
|
||||
{
|
||||
token = '#';
|
||||
type = OTHER;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case '"':
|
||||
{
|
||||
string temp;
|
||||
tokenizer.get_token(type,token);
|
||||
while (type != tok::END_OF_FILE)
|
||||
{
|
||||
// if this is the end of the quoted string
|
||||
if (type == tok::CHAR && token[0] == '"' &&
|
||||
(temp.size() == 0 || temp[temp.size()-1] != '\\' ||
|
||||
(temp.size() > 1 && temp[temp.size()-2] == '\\') ))
|
||||
{
|
||||
buffer_token(DOUBLE_QUOTED_TEXT,temp);
|
||||
buffer_token(OTHER,"\"");
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
temp += token;
|
||||
}
|
||||
tokenizer.get_token(type,token);
|
||||
}
|
||||
|
||||
|
||||
type = OTHER;
|
||||
token = '"';
|
||||
} break;
|
||||
|
||||
case '\'':
|
||||
{
|
||||
string temp;
|
||||
tokenizer.get_token(type,token);
|
||||
if (type == tok::CHAR && token[0] == '\\')
|
||||
{
|
||||
temp += '\\';
|
||||
tokenizer.get_token(type,token);
|
||||
}
|
||||
temp += token;
|
||||
buffer_token(SINGLE_QUOTED_TEXT,temp);
|
||||
|
||||
// The next character should be a ' so take it out and put it in
|
||||
// the buffer.
|
||||
tokenizer.get_token(type,token);
|
||||
buffer_token(OTHER,token);
|
||||
|
||||
type = OTHER;
|
||||
token = '\'';
|
||||
} break;
|
||||
|
||||
case '/':
|
||||
{
|
||||
// look ahead to see if this is the start of a comment
|
||||
if (tokenizer.peek_type() == tok::CHAR)
|
||||
{
|
||||
if (tokenizer.peek_token()[0] == '/')
|
||||
{
|
||||
tokenizer.get_token(type,token);
|
||||
// this is the start of a line comment
|
||||
token = "//";
|
||||
string temp;
|
||||
tokenizer.get_token(type,temp);
|
||||
while (type != tok::END_OF_FILE)
|
||||
{
|
||||
// if this is the end of the comment
|
||||
if (type == tok::END_OF_LINE &&
|
||||
token[token.size()-1] != '\\' )
|
||||
{
|
||||
token += '\n';
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
token += temp;
|
||||
}
|
||||
tokenizer.get_token(type,temp);
|
||||
}
|
||||
type = COMMENT;
|
||||
|
||||
}
|
||||
else if (tokenizer.peek_token()[0] == '*')
|
||||
{
|
||||
tokenizer.get_token(type,token);
|
||||
// this is the start of a block comment
|
||||
token = "/*";
|
||||
string temp;
|
||||
tokenizer.get_token(type,temp);
|
||||
while (type != tok::END_OF_FILE)
|
||||
{
|
||||
// if this is the end of the comment
|
||||
if (type == tok::CHAR && temp[0] == '/' &&
|
||||
token[token.size()-1] == '*')
|
||||
{
|
||||
token += '/';
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
token += temp;
|
||||
}
|
||||
tokenizer.get_token(type,temp);
|
||||
}
|
||||
type = COMMENT;
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
||||
default:
|
||||
break;
|
||||
} // switch (token[0])
|
||||
} // switch (type)
|
||||
}
|
||||
else
|
||||
{
|
||||
// if we get this far it means we have peeked so we should
|
||||
// return the peek data.
|
||||
type = next_type;
|
||||
token = next_token;
|
||||
have_peeked = false;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
int cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
peek_type (
|
||||
) const
|
||||
{
|
||||
const_cast<cpp_tokenizer_kernel_1<tok,queue,set>*>(this)->get_token(next_type,next_token);
|
||||
have_peeked = true;
|
||||
return next_type;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
const std::string& cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
peek_token (
|
||||
) const
|
||||
{
|
||||
const_cast<cpp_tokenizer_kernel_1<tok,queue,set>*>(this)->get_token(next_type,next_token);
|
||||
have_peeked = true;
|
||||
return next_token;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tok,
|
||||
typename queue,
|
||||
typename set
|
||||
>
|
||||
void cpp_tokenizer_kernel_1<tok,queue,set>::
|
||||
swap (
|
||||
cpp_tokenizer_kernel_1& item
|
||||
)
|
||||
{
|
||||
tokenizer.swap(item.tokenizer);
|
||||
buffer.swap(item.buffer);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_CPP_TOKENIZER_KERNEl_1_
|
||||
|
||||
224
lib/3rdParty/dlib/include/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h
vendored
Normal file
224
lib/3rdParty/dlib/include/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h
vendored
Normal file
@@ -0,0 +1,224 @@
|
||||
// Copyright (C) 2005 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_CPP_TOKENIZER_KERNEl_ABSTRACT_
|
||||
#ifdef DLIB_CPP_TOKENIZER_KERNEl_ABSTRACT_
|
||||
|
||||
#include <string>
|
||||
#include <ioswfd>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
class cpp_tokenizer
|
||||
{
|
||||
/*!
|
||||
INITIAL VALUE
|
||||
stream_is_set() == false
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object represents a simple tokenizer for C++ source code.
|
||||
|
||||
BUFFERING
|
||||
This object is allowed to buffer data from the input stream.
|
||||
Thus if you clear it or switch streams (via calling set_stream())
|
||||
any buffered data will be lost.
|
||||
|
||||
TOKENS
|
||||
When picking out tokens the cpp_tokenizer will always extract the
|
||||
longest token it can. For example, if faced with the string
|
||||
"AAA" it will consider the three As to be a single IDENTIFIER
|
||||
token not three smaller IDENTIFIER tokens.
|
||||
|
||||
Also note that no characters in the input stream are discarded.
|
||||
They will all be returned in the text of some token.
|
||||
Additionally, each character will never be returned more than once.
|
||||
This means that if you concatenated all returned tokens it would exactly
|
||||
reproduce the contents of the input stream.
|
||||
|
||||
The tokens are defined as follows:
|
||||
|
||||
END_OF_FILE
|
||||
This token represents the end of file. It doesn't have any
|
||||
actual characters associated with it.
|
||||
|
||||
KEYWORD
|
||||
This token matches a C++ keyword. (This includes the preprocessor
|
||||
directives).
|
||||
|
||||
COMMENT
|
||||
This token matches a C++ comment.
|
||||
|
||||
SINGLE_QUOTED_TEXT
|
||||
This token matches the text of any single quoted literal.
|
||||
For example, 'a' would be a match and the text of this token
|
||||
would be the single character a.
|
||||
|
||||
DOUBLE_QUOTED_TEXT
|
||||
This token matches the text of any double quoted string.
|
||||
For example, "C++" would be a match and the text of this token
|
||||
would be the three character string C++.
|
||||
|
||||
WHITE_SPACE
|
||||
This is a multi character token. It is defined as a sequence of
|
||||
one or more spaces, carrage returns, newlines, and tabs. I.e. It
|
||||
is composed of characters from the following string " \r\n\t".
|
||||
|
||||
IDENTIFIER
|
||||
This token matches any C++ identifier that isn't matched by any
|
||||
of the above tokens. (A C++ identifier being a string matching
|
||||
the regular expression [_$a-zA-Z][_$a-zA-Z0-9]*).
|
||||
|
||||
NUMBER
|
||||
This token matches any C++ numerical constant.
|
||||
|
||||
OTHER
|
||||
This matches anything that isn't part of one of the above tokens.
|
||||
It is always a single character.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
enum
|
||||
{
|
||||
END_OF_FILE,
|
||||
KEYWORD,
|
||||
COMMENT,
|
||||
SINGLE_QUOTED_TEXT,
|
||||
DOUBLE_QUOTED_TEXT,
|
||||
IDENTIFIER,
|
||||
OTHER,
|
||||
NUMBER,
|
||||
WHITE_SPACE
|
||||
};
|
||||
|
||||
cpp_tokenizer (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #*this is properly initialized
|
||||
throws
|
||||
- std::bad_alloc
|
||||
!*/
|
||||
|
||||
virtual ~cpp_tokenizer (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- any resources associated with *this have been released
|
||||
!*/
|
||||
|
||||
void clear(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #*this has its initial value
|
||||
throws
|
||||
- std::bad_alloc
|
||||
If this exception is thrown then #*this is unusable
|
||||
until clear() is called and succeeds.
|
||||
!*/
|
||||
|
||||
void set_stream (
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #*this will read data from in and tokenize it
|
||||
- #stream_is_set() == true
|
||||
- #get_stream() == in
|
||||
!*/
|
||||
|
||||
bool stream_is_set (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns true if a stream has been associated with *this by calling
|
||||
set_stream()
|
||||
!*/
|
||||
|
||||
std::istream& get_stream (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- stream_is_set() == true
|
||||
ensures
|
||||
- returns a reference to the istream object that *this is reading
|
||||
from.
|
||||
!*/
|
||||
|
||||
void get_token (
|
||||
int& type,
|
||||
std::string& token
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- stream_is_set() == true
|
||||
ensures
|
||||
- #token == the next token from the input stream get_stream()
|
||||
- #type == the type of the token in #token
|
||||
throws
|
||||
- bad_alloc
|
||||
If this exception is thrown then the call to this function will
|
||||
have no effect on *this but the values of #type and #token will be
|
||||
undefined. Additionally, some characters may have been read
|
||||
from the stream get_stream() and lost.
|
||||
!*/
|
||||
|
||||
int peek_type (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- stream_is_set() == true
|
||||
ensures
|
||||
- returns the type of the token that will be returned from
|
||||
the next call to get_token()
|
||||
throws
|
||||
- bad_alloc
|
||||
If this exception is thrown then the call to this function will
|
||||
have no effect on *this. However, some characters may have been
|
||||
read from the stream get_stream() and lost.
|
||||
!*/
|
||||
|
||||
const std::string& peek_token (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- stream_is_set() == true
|
||||
ensures
|
||||
- returns the text of the token that will be returned from
|
||||
the next call to get_token()
|
||||
throws
|
||||
- bad_alloc
|
||||
If this exception is thrown then the call to this function will
|
||||
have no effect on *this. However, some characters may have been
|
||||
read from the stream get_stream() and lost.
|
||||
!*/
|
||||
|
||||
void swap (
|
||||
cpp_tokenizer& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- swaps *this and item
|
||||
!*/
|
||||
|
||||
private:
|
||||
|
||||
// restricted functions
|
||||
cpp_tokenizer(const cpp_tokenizer&); // copy constructor
|
||||
cpp_tokenizer& operator=(const cpp_tokenizer&); // assignment operator
|
||||
|
||||
};
|
||||
|
||||
inline void swap (
|
||||
cpp_tokenizer& a,
|
||||
cpp_tokenizer& b
|
||||
) { a.swap(b); }
|
||||
/*!
|
||||
provides a global swap function
|
||||
!*/
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_CPP_TOKENIZER_KERNEl_ABSTRACT_
|
||||
|
||||
137
lib/3rdParty/dlib/include/dlib/cpp_tokenizer/cpp_tokenizer_kernel_c.h
vendored
Normal file
137
lib/3rdParty/dlib/include/dlib/cpp_tokenizer/cpp_tokenizer_kernel_c.h
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
// Copyright (C) 2003 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_CPP_TOKENIZER_KERNEl_C_
|
||||
#define DLIB_CPP_TOKENIZER_KERNEl_C_
|
||||
|
||||
#include "cpp_tokenizer_kernel_abstract.h"
|
||||
#include "../assert.h"
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
template <
|
||||
typename tokenizer
|
||||
>
|
||||
class cpp_tokenizer_kernel_c : public tokenizer
|
||||
{
|
||||
|
||||
public:
|
||||
std::istream& get_stream (
|
||||
) const;
|
||||
|
||||
void get_token (
|
||||
int& type,
|
||||
std::string& token
|
||||
);
|
||||
|
||||
int peek_type (
|
||||
) const;
|
||||
|
||||
const std::string& peek_token (
|
||||
) const;
|
||||
|
||||
};
|
||||
|
||||
template <
|
||||
typename tokenizer
|
||||
>
|
||||
inline void swap (
|
||||
cpp_tokenizer_kernel_c<tokenizer>& a,
|
||||
cpp_tokenizer_kernel_c<tokenizer>& b
|
||||
) { a.swap(b); }
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// member function definitions
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tokenizer
|
||||
>
|
||||
std::istream& cpp_tokenizer_kernel_c<tokenizer>::
|
||||
get_stream (
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_CASSERT( this->stream_is_set() == true,
|
||||
"\tstd::istream& cpp_tokenizer::get_stream()"
|
||||
<< "\n\tyou must set a stream for this object before you can get it"
|
||||
<< "\n\tthis: " << this
|
||||
);
|
||||
|
||||
// call the real function
|
||||
return tokenizer::get_stream();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tokenizer
|
||||
>
|
||||
const std::string& cpp_tokenizer_kernel_c<tokenizer>::
|
||||
peek_token (
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_CASSERT( this->stream_is_set() == true,
|
||||
"\tconst std::string& cpp_tokenizer::peek_token()"
|
||||
<< "\n\tyou must set a stream for this object before you can peek at what it contains"
|
||||
<< "\n\tthis: " << this
|
||||
);
|
||||
|
||||
// call the real function
|
||||
return tokenizer::peek_token();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tokenizer
|
||||
>
|
||||
int cpp_tokenizer_kernel_c<tokenizer>::
|
||||
peek_type (
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_CASSERT( this->stream_is_set() == true,
|
||||
"\tint cpp_tokenizer::peek_type()"
|
||||
<< "\n\tyou must set a stream for this object before you can peek at what it contains"
|
||||
<< "\n\tthis: " << this
|
||||
);
|
||||
|
||||
// call the real function
|
||||
return tokenizer::peek_type();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename tokenizer
|
||||
>
|
||||
void cpp_tokenizer_kernel_c<tokenizer>::
|
||||
get_token (
|
||||
int& type,
|
||||
std::string& token
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_CASSERT( this->stream_is_set() == true,
|
||||
"\tvoid cpp_tokenizer::get_token()"
|
||||
<< "\n\tyou must set a stream for this object before you can get tokens from it."
|
||||
<< "\n\tthis: " << this
|
||||
);
|
||||
|
||||
// call the real function
|
||||
tokenizer::get_token(type,token);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_TOKENIZER_KERNEl_C_
|
||||
|
||||
|
||||
Reference in New Issue
Block a user