/*
Copyright (C) 1993-2015 John W. Eaton
This file is part of Octave.
Octave is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
Octave is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with Octave; see the file COPYING. If not, see
.
*/
#if !defined (octave_lex_h)
#define octave_lex_h 1
#include
#include
#include
#include
#include
#include "comment-list.h"
#include "input.h"
#include "token.h"
// Is the given string a keyword?
extern bool is_keyword (const std::string& s);
// For communication between the lexer and parser.
class
lexical_feedback
{
public:
// Track symbol table information when parsing functions.
class symbol_table_context
{
public:
symbol_table_context (void) : frame_stack () { }
void clear (void)
{
while (! frame_stack.empty ())
frame_stack.pop ();
}
bool empty (void) const { return frame_stack.empty (); }
void pop (void)
{
if (empty ())
panic_impossible ();
frame_stack.pop ();
}
void push (symbol_table::scope_id scope = symbol_table::current_scope ())
{
frame_stack.push (scope);
}
symbol_table::scope_id curr_scope (void) const
{
return empty () ? symbol_table::current_scope () : frame_stack.top ();
}
private:
std::stack frame_stack;
};
// Track nesting of square brackets, curly braces, and parentheses.
class bbp_nesting_level
{
private:
enum bracket_type
{
BRACKET = 1,
BRACE = 2,
PAREN = 3,
ANON_FCN_BODY = 4
};
public:
bbp_nesting_level (void) : context () { }
bbp_nesting_level (const bbp_nesting_level& nl) : context (nl.context) { }
bbp_nesting_level& operator = (const bbp_nesting_level& nl)
{
if (&nl != this)
context = nl.context;
return *this;
}
~bbp_nesting_level (void) { }
void reset (void)
{
while (! context.empty ())
context.pop ();
}
void bracket (void) { context.push (BRACKET); }
bool is_bracket (void)
{
return ! context.empty () && context.top () == BRACKET;
}
void brace (void) { context.push (BRACE); }
bool is_brace (void)
{
return ! context.empty () && context.top () == BRACE;
}
void paren (void) { context.push (PAREN); }
bool is_paren (void)
{
return ! context.empty () && context.top () == PAREN;
}
void anon_fcn_body (void) { context.push (ANON_FCN_BODY); }
bool is_anon_fcn_body (void)
{
return ! context.empty () && context.top () == ANON_FCN_BODY;
}
bool is_bracket_or_brace (void)
{
return (! context.empty ()
&& (context.top () == BRACKET || context.top () == BRACE));
}
bool none (void) { return context.empty (); }
void remove (void)
{
if (! context.empty ())
context.pop ();
}
void clear (void)
{
while (! context.empty ())
context.pop ();
}
private:
std::stack context;
};
class token_cache
{
public:
// Store an "unlimited" number of tokens.
token_cache (size_t sz_arg = std::numeric_limits::max ())
: buffer (), sz (sz_arg)
{ }
void push (token *tok)
{
if (buffer.size () == sz)
pop ();
buffer.push_front (tok);
}
void pop (void)
{
if (! empty ())
{
delete buffer.back ();
buffer.pop_back ();
}
}
// Direct access.
token *at (size_t n)
{
return empty () ? 0 : buffer.at (n);
}
const token *at (size_t n) const
{
return empty () ? 0 : buffer.at (n);
}
// Most recently pushed.
token *front (void)
{
return empty () ? 0 : buffer.front ();
}
const token *front (void) const
{
return empty () ? 0 : buffer.front ();
}
token *back (void)
{
return empty () ? 0 : buffer.back ();
}
const token *back (void) const
{
return empty () ? 0 : buffer.back ();
}
// Number of elements currently in the buffer, max of sz.
size_t size (void) const { return buffer.size (); }
bool empty (void) const { return buffer.empty (); }
void clear (void)
{
while (! empty ())
pop ();
}
private:
std::deque buffer;
size_t sz;
// No copying!
token_cache (const token_cache&);
token_cache& operator = (const token_cache&);
};
lexical_feedback (void)
: end_of_input (false), at_beginning_of_statement (true),
looking_at_anon_fcn_args (false), looking_at_return_list (false),
looking_at_parameter_list (false), looking_at_decl_list (false),
looking_at_initializer_expression (false),
looking_at_matrix_or_assign_lhs (false),
looking_for_object_index (false),
looking_at_indirect_ref (false), parsing_class_method (false),
parsing_classdef (false), maybe_classdef_get_set_method (false),
parsing_classdef_get_method (false),
parsing_classdef_set_method (false),
quote_is_transpose (false), force_script (false),
reading_fcn_file (false), reading_script_file (false),
reading_classdef_file (false),
input_line_number (1), current_input_column (1),
bracketflag (0), braceflag (0),
looping (0), defining_func (0), looking_at_function_handle (0),
block_comment_nesting_level (0), command_arg_paren_count (0),
token_count (0), current_input_line (), comment_text (),
help_text (), string_text (), string_line (0), string_column (0),
fcn_file_name (), fcn_file_full_name (), looking_at_object_index (),
parsed_function_name (), pending_local_variables (),
symtab_context (), nesting_level (), tokens ()
{
init ();
}
~lexical_feedback (void);
void init (void);
void reset (void);
int previous_token_value (void) const;
bool previous_token_value_is (int tok_val) const;
void mark_previous_token_trailing_space (void);
bool space_follows_previous_token (void) const;
bool previous_token_is_binop (void) const;
bool previous_token_is_keyword (void) const;
bool previous_token_may_be_command (void) const;
void maybe_mark_previous_token_as_variable (void);
void mark_as_variable (const std::string& nm);
void mark_as_variables (const std::list& lst);
// true means that we have encountered eof on the input stream.
bool end_of_input;
// true means we are at the beginning of a statement, where a
// command name is possible.
bool at_beginning_of_statement;
// true means we are parsing an anonymous function argument list.
bool looking_at_anon_fcn_args;
// true means we're parsing the return list for a function.
bool looking_at_return_list;
// true means we're parsing the parameter list for a function.
bool looking_at_parameter_list;
// true means we're parsing a declaration list (global or
// persistent).
bool looking_at_decl_list;
// true means we are looking at the initializer expression for a
// parameter list element.
bool looking_at_initializer_expression;
// true means we're parsing a matrix or the left hand side of
// multi-value assignment statement.
bool looking_at_matrix_or_assign_lhs;
// object index not possible until we've seen something.
bool looking_for_object_index;
// true means we're looking at an indirect reference to a
// structure element.
bool looking_at_indirect_ref;
// true means we are parsing a class method in function or classdef file.
bool parsing_class_method;
// true means we are parsing a classdef file
bool parsing_classdef;
// true means we are parsing a class method declaration line in a
// classdef file and can accept a property get or set method name.
// for example, "get.propertyname" is recognized as a function name.
bool maybe_classdef_get_set_method;
// TRUE means we are parsing a classdef get.method.
bool parsing_classdef_get_method;
// TRUE means we are parsing a classdef set.method.
bool parsing_classdef_set_method;
// return transpose or start a string?
bool quote_is_transpose;
// TRUE means treat the current file as a script even if the first
// token is "function" or "classdef".
bool force_script;
// TRUE means we're parsing a function file.
bool reading_fcn_file;
// TRUE means we're parsing a script file.
bool reading_script_file;
// TRUE means we're parsing a classdef file.
bool reading_classdef_file;
// the current input line number.
int input_line_number;
// the column of the current token.
int current_input_column;
// square bracket level count.
int bracketflag;
// curly brace level count.
int braceflag;
// true means we're in the middle of defining a loop.
int looping;
// nonzero means we're in the middle of defining a function.
int defining_func;
// nonzero means we are parsing a function handle.
int looking_at_function_handle;
// nestng level for blcok comments.
int block_comment_nesting_level;
// Parenthesis count for command argument parsing.
int command_arg_paren_count;
// Count of tokens recognized by this lexer since initialized or
// since the last reset.
size_t token_count;
// The current line of input.
std::string current_input_line;
// The current comment text.
std::string comment_text;
// The current help text.
std::string help_text;
// The current character string text.
std::string string_text;
// The position of the beginning of the current character string.
int string_line;
int string_column;
// Simple name of function file we are reading.
std::string fcn_file_name;
// Full name of file we are reading.
std::string fcn_file_full_name;
// if the front of the list is true, the closest paren, brace, or
// bracket nesting is an index for an object.
std::list looking_at_object_index;
// if the top of the stack is true, then we've already seen the name
// of the current function. should only matter if
// current_function_level > 0
std::stack parsed_function_name;
// set of identifiers that might be local variable names.
std::set pending_local_variables;
// Track current symbol table scope and context.
symbol_table_context symtab_context;
// is the closest nesting level a square bracket, squiggly brace,
// a paren, or an anonymous function body?
bbp_nesting_level nesting_level;
// Tokens generated by the lexer.
token_cache tokens;
private:
// No copying!
lexical_feedback (const lexical_feedback&);
lexical_feedback& operator = (const lexical_feedback&);
};
// octave_base_lexer inherits from lexical_feedback because we will
// eventually have several different constructors and it is easier to
// intialize if everything is grouped in a parent class rather than
// listing all the members in the octave_base_lexer class.
class
octave_base_lexer : public lexical_feedback
{
public:
// Handle buffering of input for lexer.
class input_buffer
{
public:
input_buffer (void)
: buffer (), pos (0), chars_left (0), eof (false)
{ }
void fill (const std::string& input, bool eof_arg);
// Copy at most max_size characters to buf.
int copy_chunk (char *buf, size_t max_size);
bool empty (void) const { return chars_left == 0; }
bool at_eof (void) const { return eof; }
private:
std::string buffer;
const char *pos;
size_t chars_left;
bool eof;
};
// Collect comment text.
class
comment_buffer
{
public:
comment_buffer (void) : comment_list (0) { }
~comment_buffer (void) { delete comment_list; }
void append (const std::string& s, octave_comment_elt::comment_type t)
{
if (! comment_list)
comment_list = new octave_comment_list ();
comment_list->append (s, t);
}
// Caller is expected to delete the returned value.
octave_comment_list *get_comment (void)
{
octave_comment_list *retval = comment_list;
comment_list = 0;
return retval;
}
void reset (void)
{
delete comment_list;
comment_list = 0;
}
private:
octave_comment_list *comment_list;
};
octave_base_lexer (void)
: lexical_feedback (), scanner (0), input_buf (), comment_buf ()
{
init ();
}
virtual ~octave_base_lexer (void);
void init (void);
virtual bool is_push_lexer (void) const { return false; }
virtual void reset (void);
void prep_for_file (void);
void begin_string (int state);
virtual int fill_flex_buffer (char *buf, unsigned int max_size) = 0;
bool at_end_of_buffer (void) const { return input_buf.empty (); }
bool at_end_of_file (void) const { return input_buf.at_eof (); }
int handle_end_of_input (void);
char *flex_yytext (void);
int flex_yyleng (void);
int text_yyinput (void);
void xunput (char c, char *buf);
void xunput (char c);
bool looking_at_space (void);
bool inside_any_object_index (void);
bool is_variable (const std::string& name);
int is_keyword_token (const std::string& s);
bool fq_identifier_contains_keyword (const std::string& s);
bool whitespace_is_significant (void);
void handle_number (void);
void handle_continuation (void);
void finish_comment (octave_comment_elt::comment_type typ);
octave_comment_list *get_comment (void) { return comment_buf.get_comment (); }
int handle_close_bracket (int bracket_type);
bool looks_like_command_arg (void);
int handle_superclass_identifier (void);
int handle_meta_identifier (void);
int handle_fq_identifier (void);
int handle_identifier (void);
void maybe_warn_separator_insert (char sep);
void gripe_single_quote_string (void);
void gripe_language_extension (const std::string& msg);
void maybe_gripe_language_extension_comment (char c);
void gripe_language_extension_continuation (void);
void gripe_language_extension_operator (const std::string& op);
void push_token (token *);
token *current_token (void);
void display_token (int tok);
void fatal_error (const char *msg);
void lexer_debug (const char *pattern);
// Internal state of the flex-generated lexer.
void *scanner;
// Object that reads and buffers input.
input_buffer input_buf;
// Object that collects comment text.
comment_buffer comment_buf;
virtual void increment_promptflag (void) = 0;
virtual void decrement_promptflag (void) = 0;
virtual int promptflag (void) const = 0;
virtual int promptflag (int) = 0;
virtual std::string input_source (void) const { return "unknown"; }
virtual bool input_from_terminal (void) const { return false; }
virtual bool input_from_file (void) const { return false; }
virtual bool input_from_eval_string (void) const { return false; }
void push_start_state (int state);
void pop_start_state (void);
void clear_start_state (void);
int start_state (void) const { return start_state_stack.top (); }
void display_start_state (void) const;
int handle_op (const char *pattern, int tok, bool bos = false);
int handle_language_extension_op (const char *pattern, int tok,
bool bos = false);
bool maybe_unput_comma_before_unary_op (int tok);
int handle_unary_op (int tok, bool bos = false);
int handle_language_extension_unary_op (int tok, bool bos = false);
int handle_assign_op (const char *pattern, int tok);
int handle_language_extension_assign_op (const char *pattern, int tok);
int handle_op_internal (int tok, bool bos, bool compat);
int handle_token (const std::string& name, int tok);
int handle_token (int tok, token *tok_val = 0);
int count_token (int tok);
int count_token_internal (int tok);
int show_token (int tok);
void enable_fq_identifier (void);
protected:
std::stack start_state_stack;
// No copying!
octave_base_lexer (const octave_base_lexer&);
octave_base_lexer& operator = (const octave_base_lexer&);
};
class
octave_lexer : public octave_base_lexer
{
public:
octave_lexer (void)
: octave_base_lexer (), input_reader (this)
{ }
octave_lexer (FILE *file)
: octave_base_lexer (), input_reader (file, this)
{ }
octave_lexer (const std::string& eval_string)
: octave_base_lexer (), input_reader (eval_string, this)
{ }
void reset (void)
{
input_reader.reset ();
octave_base_lexer::reset ();
}
void increment_promptflag (void) { input_reader.increment_promptflag (); }
void decrement_promptflag (void) { input_reader.decrement_promptflag (); }
int promptflag (void) const { return input_reader.promptflag (); }
int promptflag (int n) { return input_reader.promptflag (n); }
std::string input_source (void) const
{
return input_reader.input_source ();
}
bool input_from_terminal (void) const
{
return input_reader.input_from_terminal ();
}
bool input_from_file (void) const
{
return input_reader.input_from_file ();
}
bool input_from_eval_string (void) const
{
return input_reader.input_from_eval_string ();
}
int fill_flex_buffer (char *buf, unsigned int max_size);
octave_input_reader input_reader;
protected:
// No copying!
octave_lexer (const octave_lexer&);
octave_lexer& operator = (const octave_lexer&);
};
class
octave_push_lexer : public octave_base_lexer
{
public:
octave_push_lexer (const std::string& input = std::string (),
bool eof = false)
: octave_base_lexer (), pflag (1)
{
append_input (input, eof);
}
bool is_push_lexer (void) const { return true; }
void reset (void)
{
promptflag (1);
octave_base_lexer::reset ();
}
void append_input (const std::string& input, bool eof)
{
input_buf.fill (input, eof);
}
void increment_promptflag (void) { pflag++; }
void decrement_promptflag (void) { pflag--; }
int promptflag (void) const { return pflag; }
int promptflag (int n)
{
int retval = pflag;
pflag = n;
return retval;
}
std::string input_source (void) const { return "push buffer"; }
int fill_flex_buffer (char *buf, unsigned int max_size);
protected:
int pflag;
// No copying!
octave_push_lexer (const octave_push_lexer&);
octave_push_lexer& operator = (const octave_push_lexer&);
};
#endif