Prism Ruby parser
Loading...
Searching...
No Matches
Data Structures | Macros | Typedefs | Enumerations
parser.h File Reference

The parser used to parse Ruby source. More...

#include "prism/defines.h"
#include "prism/ast.h"
#include "prism/encoding.h"
#include "prism/options.h"
#include "prism/static_literals.h"
#include "prism/util/pm_constant_pool.h"
#include "prism/util/pm_list.h"
#include "prism/util/pm_newline_list.h"
#include "prism/util/pm_string.h"
#include <stdbool.h>
Include dependency graph for parser.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  pm_heredoc_lex_mode_t
 All of the information necessary to store to lexing a heredoc. More...
 
struct  pm_lex_mode
 When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is currently lexing. More...
 
struct  pm_context_node
 This is a node in a linked list of contexts. More...
 
struct  pm_comment
 This is a node in the linked list of comments that we've found while parsing. More...
 
struct  pm_magic_comment_t
 This is a node in the linked list of magic comments that we've found while parsing. More...
 
struct  pm_lex_callback_t
 When you are lexing through a file, the lexer needs all of the information that the parser additionally provides (for example, the local table). More...
 
struct  pm_local_t
 This tracks an individual local variable in a certain lexical context, as well as the number of times is it read. More...
 
struct  pm_locals
 This is a set of local variables in a certain lexical context (method, class, module, etc.). More...
 
struct  pm_scope
 This struct represents a node in a linked list of scopes. More...
 
struct  pm_parser
 This struct represents the overall parser. More...
 

Macros

#define PM_LEX_STACK_SIZE   4
 We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times while parsing.
 

Typedefs

typedef struct pm_lex_mode pm_lex_mode_t
 When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is currently lexing.
 
typedef struct pm_parser pm_parser_t
 The parser used to parse Ruby source.
 
typedef struct pm_context_node pm_context_node_t
 This is a node in a linked list of contexts.
 
typedef struct pm_comment pm_comment_t
 This is a node in the linked list of comments that we've found while parsing.
 
typedef void(* pm_encoding_changed_callback_t) (pm_parser_t *parser)
 When the encoding that is being used to parse the source is changed by prism, we provide the ability here to call out to a user-defined function.
 
typedef uint8_t pm_shareable_constant_value_t
 The type of shareable constant value that can be set.
 
typedef struct pm_locals pm_locals_t
 This is a set of local variables in a certain lexical context (method, class, module, etc.).
 
typedef uint8_t pm_scope_parameters_t
 The flags about scope parameters that can be set.
 
typedef struct pm_scope pm_scope_t
 This struct represents a node in a linked list of scopes.
 
typedef uint32_t pm_state_stack_t
 A struct that represents a stack of boolean values.
 

Enumerations

enum  pm_lex_state_bit_t {
  PM_LEX_STATE_BIT_BEG , PM_LEX_STATE_BIT_END , PM_LEX_STATE_BIT_ENDARG , PM_LEX_STATE_BIT_ENDFN ,
  PM_LEX_STATE_BIT_ARG , PM_LEX_STATE_BIT_CMDARG , PM_LEX_STATE_BIT_MID , PM_LEX_STATE_BIT_FNAME ,
  PM_LEX_STATE_BIT_DOT , PM_LEX_STATE_BIT_CLASS , PM_LEX_STATE_BIT_LABEL , PM_LEX_STATE_BIT_LABELED ,
  PM_LEX_STATE_BIT_FITEM
}
 This enum provides various bits that represent different kinds of states that the lexer can track. More...
 
enum  pm_lex_state_t {
  PM_LEX_STATE_NONE = 0 , PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG) , PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END) , PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG) ,
  PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN) , PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG) , PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG) , PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID) ,
  PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME) , PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT) , PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS) , PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL) ,
  PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED) , PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM) , PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS , PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG ,
  PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
}
 This enum combines the various bits from the above enum into individual values that represent the various states of the lexer.
 
enum  pm_heredoc_quote_t { PM_HEREDOC_QUOTE_NONE , PM_HEREDOC_QUOTE_SINGLE = '\'' , PM_HEREDOC_QUOTE_DOUBLE = '"' , PM_HEREDOC_QUOTE_BACKTICK = '`' }
 The type of quote that a heredoc uses.
 
enum  pm_heredoc_indent_t { PM_HEREDOC_INDENT_NONE , PM_HEREDOC_INDENT_DASH , PM_HEREDOC_INDENT_TILDE }
 The type of indentation that a heredoc uses.
 
enum  pm_context_t {
  PM_CONTEXT_NONE = 0 , PM_CONTEXT_BEGIN , PM_CONTEXT_BEGIN_ENSURE , PM_CONTEXT_BEGIN_ELSE ,
  PM_CONTEXT_BEGIN_RESCUE , PM_CONTEXT_BLOCK_BRACES , PM_CONTEXT_BLOCK_KEYWORDS , PM_CONTEXT_BLOCK_ENSURE ,
  PM_CONTEXT_BLOCK_ELSE , PM_CONTEXT_BLOCK_RESCUE , PM_CONTEXT_CASE_WHEN , PM_CONTEXT_CASE_IN ,
  PM_CONTEXT_CLASS , PM_CONTEXT_CLASS_ENSURE , PM_CONTEXT_CLASS_ELSE , PM_CONTEXT_CLASS_RESCUE ,
  PM_CONTEXT_DEF , PM_CONTEXT_DEF_ENSURE , PM_CONTEXT_DEF_ELSE , PM_CONTEXT_DEF_RESCUE ,
  PM_CONTEXT_DEF_PARAMS , PM_CONTEXT_DEFINED , PM_CONTEXT_DEFAULT_PARAMS , PM_CONTEXT_ELSE ,
  PM_CONTEXT_ELSIF , PM_CONTEXT_EMBEXPR , PM_CONTEXT_FOR , PM_CONTEXT_FOR_INDEX ,
  PM_CONTEXT_IF , PM_CONTEXT_LAMBDA_BRACES , PM_CONTEXT_LAMBDA_DO_END , PM_CONTEXT_LAMBDA_ENSURE ,
  PM_CONTEXT_LAMBDA_ELSE , PM_CONTEXT_LAMBDA_RESCUE , PM_CONTEXT_LOOP_PREDICATE , PM_CONTEXT_MAIN ,
  PM_CONTEXT_MODULE , PM_CONTEXT_MODULE_ENSURE , PM_CONTEXT_MODULE_ELSE , PM_CONTEXT_MODULE_RESCUE ,
  PM_CONTEXT_MULTI_TARGET , PM_CONTEXT_PARENS , PM_CONTEXT_POSTEXE , PM_CONTEXT_PREDICATE ,
  PM_CONTEXT_PREEXE , PM_CONTEXT_RESCUE_MODIFIER , PM_CONTEXT_SCLASS , PM_CONTEXT_SCLASS_ENSURE ,
  PM_CONTEXT_SCLASS_ELSE , PM_CONTEXT_SCLASS_RESCUE , PM_CONTEXT_TERNARY , PM_CONTEXT_UNLESS ,
  PM_CONTEXT_UNTIL , PM_CONTEXT_WHILE
}
 While parsing, we keep track of a stack of contexts. More...
 
enum  pm_comment_type_t { PM_COMMENT_INLINE , PM_COMMENT_EMBDOC }
 This is the type of a comment that we've found while parsing.
 

Detailed Description

The parser used to parse Ruby source.

Macro Definition Documentation

◆ PM_LEX_STACK_SIZE

#define PM_LEX_STACK_SIZE   4

We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times while parsing.

You really shouldn't need more than this because you only really nest deeply when doing string interpolation.

Typedef Documentation

◆ pm_lex_mode_t

typedef struct pm_lex_mode pm_lex_mode_t

When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is currently lexing.

For example, when we find the start of a string, the first token that we return is a TOKEN_STRING_BEGIN token. After that the lexer is now in the PM_LEX_STRING mode, and will return tokens that are found as part of a string.

◆ pm_locals_t

typedef struct pm_locals pm_locals_t

This is a set of local variables in a certain lexical context (method, class, module, etc.).

We need to track how many times these variables are read in order to warn if they only get written.

◆ pm_scope_t

typedef struct pm_scope pm_scope_t

This struct represents a node in a linked list of scopes.

Some scopes can see into their parent scopes, while others cannot.

Enumeration Type Documentation

◆ pm_lex_state_bit_t

This enum provides various bits that represent different kinds of states that the lexer can track.

This is used to determine which kind of token to return based on the context of the parser.

◆ pm_context_t

While parsing, we keep track of a stack of contexts.

This is helpful for error recovery so that we can pop back to a previous context when we hit a token that is understood by a parent context but not by the current context.

Enumerator
PM_CONTEXT_NONE 

a null context, used for returning a value from a function

PM_CONTEXT_BEGIN 

a begin statement

PM_CONTEXT_BEGIN_ENSURE 

an ensure statement with an explicit begin

PM_CONTEXT_BEGIN_ELSE 

a rescue else statement with an explicit begin

PM_CONTEXT_BEGIN_RESCUE 

a rescue statement with an explicit begin

PM_CONTEXT_BLOCK_BRACES 

expressions in block arguments using braces

PM_CONTEXT_BLOCK_KEYWORDS 

expressions in block arguments using do..end

PM_CONTEXT_BLOCK_ENSURE 

an ensure statement within a do..end block

PM_CONTEXT_BLOCK_ELSE 

a rescue else statement within a do..end block

PM_CONTEXT_BLOCK_RESCUE 

a rescue statement within a do..end block

PM_CONTEXT_CASE_WHEN 

a case when statements

PM_CONTEXT_CASE_IN 

a case in statements

PM_CONTEXT_CLASS 

a class declaration

PM_CONTEXT_CLASS_ENSURE 

an ensure statement within a class statement

PM_CONTEXT_CLASS_ELSE 

a rescue else statement within a class statement

PM_CONTEXT_CLASS_RESCUE 

a rescue statement within a class statement

PM_CONTEXT_DEF 

a method definition

PM_CONTEXT_DEF_ENSURE 

an ensure statement within a method definition

PM_CONTEXT_DEF_ELSE 

a rescue else statement within a method definition

PM_CONTEXT_DEF_RESCUE 

a rescue statement within a method definition

PM_CONTEXT_DEF_PARAMS 

a method definition's parameters

PM_CONTEXT_DEFINED 

a defined? expression

PM_CONTEXT_DEFAULT_PARAMS 

a method definition's default parameter

PM_CONTEXT_ELSE 

an else clause

PM_CONTEXT_ELSIF 

an elsif clause

PM_CONTEXT_EMBEXPR 

an interpolated expression

PM_CONTEXT_FOR 

a for loop

PM_CONTEXT_FOR_INDEX 

a for loop's index

PM_CONTEXT_IF 

an if statement

PM_CONTEXT_LAMBDA_BRACES 

a lambda expression with braces

PM_CONTEXT_LAMBDA_DO_END 

a lambda expression with do..end

PM_CONTEXT_LAMBDA_ENSURE 

an ensure statement within a lambda expression

PM_CONTEXT_LAMBDA_ELSE 

a rescue else statement within a lambda expression

PM_CONTEXT_LAMBDA_RESCUE 

a rescue statement within a lambda expression

PM_CONTEXT_LOOP_PREDICATE 

the predicate clause of a loop statement

PM_CONTEXT_MAIN 

the top level context

PM_CONTEXT_MODULE 

a module declaration

PM_CONTEXT_MODULE_ENSURE 

an ensure statement within a module statement

PM_CONTEXT_MODULE_ELSE 

a rescue else statement within a module statement

PM_CONTEXT_MODULE_RESCUE 

a rescue statement within a module statement

PM_CONTEXT_MULTI_TARGET 

a multiple target expression

PM_CONTEXT_PARENS 

a parenthesized expression

PM_CONTEXT_POSTEXE 

an END block

PM_CONTEXT_PREDICATE 

a predicate inside an if/elsif/unless statement

PM_CONTEXT_PREEXE 

a BEGIN block

PM_CONTEXT_RESCUE_MODIFIER 

a modifier rescue clause

PM_CONTEXT_SCLASS 

a singleton class definition

PM_CONTEXT_SCLASS_ENSURE 

an ensure statement with a singleton class

PM_CONTEXT_SCLASS_ELSE 

a rescue else statement with a singleton class

PM_CONTEXT_SCLASS_RESCUE 

a rescue statement with a singleton class

PM_CONTEXT_TERNARY 

a ternary expression

PM_CONTEXT_UNLESS 

an unless statement

PM_CONTEXT_UNTIL 

an until statement

PM_CONTEXT_WHILE 

a while statement