| |
| # The following YAML grammar is LL(1) and is parsed by a recursive descent |
| # parser. |
| # |
| # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END |
| # implicit_document ::= block_node DOCUMENT-END* |
| # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* |
| # block_node_or_indentless_sequence ::= |
| # ALIAS |
| # | properties (block_content | indentless_block_sequence)? |
| # | block_content |
| # | indentless_block_sequence |
| # block_node ::= ALIAS |
| # | properties block_content? |
| # | block_content |
| # flow_node ::= ALIAS |
| # | properties flow_content? |
| # | flow_content |
| # properties ::= TAG ANCHOR? | ANCHOR TAG? |
| # block_content ::= block_collection | flow_collection | SCALAR |
| # flow_content ::= flow_collection | SCALAR |
| # block_collection ::= block_sequence | block_mapping |
| # flow_collection ::= flow_sequence | flow_mapping |
| # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END |
| # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ |
| # block_mapping ::= BLOCK-MAPPING_START |
| # ((KEY block_node_or_indentless_sequence?)? |
| # (VALUE block_node_or_indentless_sequence?)?)* |
| # BLOCK-END |
| # flow_sequence ::= FLOW-SEQUENCE-START |
| # (flow_sequence_entry FLOW-ENTRY)* |
| # flow_sequence_entry? |
| # FLOW-SEQUENCE-END |
| # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? |
| # flow_mapping ::= FLOW-MAPPING-START |
| # (flow_mapping_entry FLOW-ENTRY)* |
| # flow_mapping_entry? |
| # FLOW-MAPPING-END |
| # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? |
| # |
| # FIRST sets: |
| # |
| # stream: { STREAM-START } |
| # explicit_document: { DIRECTIVE DOCUMENT-START } |
| # implicit_document: FIRST(block_node) |
| # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } |
| # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } |
| # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } |
| # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } |
| # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } |
| # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } |
| # block_sequence: { BLOCK-SEQUENCE-START } |
| # block_mapping: { BLOCK-MAPPING-START } |
| # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } |
| # indentless_sequence: { ENTRY } |
| # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } |
| # flow_sequence: { FLOW-SEQUENCE-START } |
| # flow_mapping: { FLOW-MAPPING-START } |
| # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } |
| # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } |
| |
| __all__ = ['Parser', 'ParserError'] |
| |
| from error import MarkedYAMLError |
| from tokens import * |
| from events import * |
| from scanner import * |
| |
| class ParserError(MarkedYAMLError): |
| pass |
| |
| class Parser(object): |
| # Since writing a recursive-descendant parser is a straightforward task, we |
| # do not give many comments here. |
| |
| DEFAULT_TAGS = { |
| u'!': u'!', |
| u'!!': u'tag:yaml.org,2002:', |
| } |
| |
| def __init__(self): |
| self.current_event = None |
| self.yaml_version = None |
| self.tag_handles = {} |
| self.states = [] |
| self.marks = [] |
| self.state = self.parse_stream_start |
| |
| def dispose(self): |
| # Reset the state attributes (to clear self-references) |
| self.states = [] |
| self.state = None |
| |
| def check_event(self, *choices): |
| # Check the type of the next event. |
| if self.current_event is None: |
| if self.state: |
| self.current_event = self.state() |
| if self.current_event is not None: |
| if not choices: |
| return True |
| for choice in choices: |
| if isinstance(self.current_event, choice): |
| return True |
| return False |
| |
| def peek_event(self): |
| # Get the next event. |
| if self.current_event is None: |
| if self.state: |
| self.current_event = self.state() |
| return self.current_event |
| |
| def get_event(self): |
| # Get the next event and proceed further. |
| if self.current_event is None: |
| if self.state: |
| self.current_event = self.state() |
| value = self.current_event |
| self.current_event = None |
| return value |
| |
| # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END |
| # implicit_document ::= block_node DOCUMENT-END* |
| # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* |
| |
| def parse_stream_start(self): |
| |
| # Parse the stream start. |
| token = self.get_token() |
| event = StreamStartEvent(token.start_mark, token.end_mark, |
| encoding=token.encoding) |
| |
| # Prepare the next state. |
| self.state = self.parse_implicit_document_start |
| |
| return event |
| |
| def parse_implicit_document_start(self): |
| |
| # Parse an implicit document. |
| if not self.check_token(DirectiveToken, DocumentStartToken, |
| StreamEndToken): |
| self.tag_handles = self.DEFAULT_TAGS |
| token = self.peek_token() |
| start_mark = end_mark = token.start_mark |
| event = DocumentStartEvent(start_mark, end_mark, |
| explicit=False) |
| |
| # Prepare the next state. |
| self.states.append(self.parse_document_end) |
| self.state = self.parse_block_node |
| |
| return event |
| |
| else: |
| return self.parse_document_start() |
| |
| def parse_document_start(self): |
| |
| # Parse any extra document end indicators. |
| while self.check_token(DocumentEndToken): |
| self.get_token() |
| |
| # Parse an explicit document. |
| if not self.check_token(StreamEndToken): |
| token = self.peek_token() |
| start_mark = token.start_mark |
| version, tags = self.process_directives() |
| if not self.check_token(DocumentStartToken): |
| raise ParserError(None, None, |
| "expected '<document start>', but found %r" |
| % self.peek_token().id, |
| self.peek_token().start_mark) |
| token = self.get_token() |
| end_mark = token.end_mark |
| event = DocumentStartEvent(start_mark, end_mark, |
| explicit=True, version=version, tags=tags) |
| self.states.append(self.parse_document_end) |
| self.state = self.parse_document_content |
| else: |
| # Parse the end of the stream. |
| token = self.get_token() |
| event = StreamEndEvent(token.start_mark, token.end_mark) |
| assert not self.states |
| assert not self.marks |
| self.state = None |
| return event |
| |
| def parse_document_end(self): |
| |
| # Parse the document end. |
| token = self.peek_token() |
| start_mark = end_mark = token.start_mark |
| explicit = False |
| if self.check_token(DocumentEndToken): |
| token = self.get_token() |
| end_mark = token.end_mark |
| explicit = True |
| event = DocumentEndEvent(start_mark, end_mark, |
| explicit=explicit) |
| |
| # Prepare the next state. |
| self.state = self.parse_document_start |
| |
| return event |
| |
| def parse_document_content(self): |
| if self.check_token(DirectiveToken, |
| DocumentStartToken, DocumentEndToken, StreamEndToken): |
| event = self.process_empty_scalar(self.peek_token().start_mark) |
| self.state = self.states.pop() |
| return event |
| else: |
| return self.parse_block_node() |
| |
| def process_directives(self): |
| self.yaml_version = None |
| self.tag_handles = {} |
| while self.check_token(DirectiveToken): |
| token = self.get_token() |
| if token.name == u'YAML': |
| if self.yaml_version is not None: |
| raise ParserError(None, None, |
| "found duplicate YAML directive", token.start_mark) |
| major, minor = token.value |
| if major != 1: |
| raise ParserError(None, None, |
| "found incompatible YAML document (version 1.* is required)", |
| token.start_mark) |
| self.yaml_version = token.value |
| elif token.name == u'TAG': |
| handle, prefix = token.value |
| if handle in self.tag_handles: |
| raise ParserError(None, None, |
| "duplicate tag handle %r" % handle.encode('utf-8'), |
| token.start_mark) |
| self.tag_handles[handle] = prefix |
| if self.tag_handles: |
| value = self.yaml_version, self.tag_handles.copy() |
| else: |
| value = self.yaml_version, None |
| for key in self.DEFAULT_TAGS: |
| if key not in self.tag_handles: |
| self.tag_handles[key] = self.DEFAULT_TAGS[key] |
| return value |
| |
| # block_node_or_indentless_sequence ::= ALIAS |
| # | properties (block_content | indentless_block_sequence)? |
| # | block_content |
| # | indentless_block_sequence |
| # block_node ::= ALIAS |
| # | properties block_content? |
| # | block_content |
| # flow_node ::= ALIAS |
| # | properties flow_content? |
| # | flow_content |
| # properties ::= TAG ANCHOR? | ANCHOR TAG? |
| # block_content ::= block_collection | flow_collection | SCALAR |
| # flow_content ::= flow_collection | SCALAR |
| # block_collection ::= block_sequence | block_mapping |
| # flow_collection ::= flow_sequence | flow_mapping |
| |
| def parse_block_node(self): |
| return self.parse_node(block=True) |
| |
| def parse_flow_node(self): |
| return self.parse_node() |
| |
| def parse_block_node_or_indentless_sequence(self): |
| return self.parse_node(block=True, indentless_sequence=True) |
| |
| def parse_node(self, block=False, indentless_sequence=False): |
| if self.check_token(AliasToken): |
| token = self.get_token() |
| event = AliasEvent(token.value, token.start_mark, token.end_mark) |
| self.state = self.states.pop() |
| else: |
| anchor = None |
| tag = None |
| start_mark = end_mark = tag_mark = None |
| if self.check_token(AnchorToken): |
| token = self.get_token() |
| start_mark = token.start_mark |
| end_mark = token.end_mark |
| anchor = token.value |
| if self.check_token(TagToken): |
| token = self.get_token() |
| tag_mark = token.start_mark |
| end_mark = token.end_mark |
| tag = token.value |
| elif self.check_token(TagToken): |
| token = self.get_token() |
| start_mark = tag_mark = token.start_mark |
| end_mark = token.end_mark |
| tag = token.value |
| if self.check_token(AnchorToken): |
| token = self.get_token() |
| end_mark = token.end_mark |
| anchor = token.value |
| if tag is not None: |
| handle, suffix = tag |
| if handle is not None: |
| if handle not in self.tag_handles: |
| raise ParserError("while parsing a node", start_mark, |
| "found undefined tag handle %r" % handle.encode('utf-8'), |
| tag_mark) |
| tag = self.tag_handles[handle]+suffix |
| else: |
| tag = suffix |
| #if tag == u'!': |
| # raise ParserError("while parsing a node", start_mark, |
| # "found non-specific tag '!'", tag_mark, |
| # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") |
| if start_mark is None: |
| start_mark = end_mark = self.peek_token().start_mark |
| event = None |
| implicit = (tag is None or tag == u'!') |
| if indentless_sequence and self.check_token(BlockEntryToken): |
| end_mark = self.peek_token().end_mark |
| event = SequenceStartEvent(anchor, tag, implicit, |
| start_mark, end_mark) |
| self.state = self.parse_indentless_sequence_entry |
| else: |
| if self.check_token(ScalarToken): |
| token = self.get_token() |
| end_mark = token.end_mark |
| if (token.plain and tag is None) or tag == u'!': |
| implicit = (True, False) |
| elif tag is None: |
| implicit = (False, True) |
| else: |
| implicit = (False, False) |
| event = ScalarEvent(anchor, tag, implicit, token.value, |
| start_mark, end_mark, style=token.style) |
| self.state = self.states.pop() |
| elif self.check_token(FlowSequenceStartToken): |
| end_mark = self.peek_token().end_mark |
| event = SequenceStartEvent(anchor, tag, implicit, |
| start_mark, end_mark, flow_style=True) |
| self.state = self.parse_flow_sequence_first_entry |
| elif self.check_token(FlowMappingStartToken): |
| end_mark = self.peek_token().end_mark |
| event = MappingStartEvent(anchor, tag, implicit, |
| start_mark, end_mark, flow_style=True) |
| self.state = self.parse_flow_mapping_first_key |
| elif block and self.check_token(BlockSequenceStartToken): |
| end_mark = self.peek_token().start_mark |
| event = SequenceStartEvent(anchor, tag, implicit, |
| start_mark, end_mark, flow_style=False) |
| self.state = self.parse_block_sequence_first_entry |
| elif block and self.check_token(BlockMappingStartToken): |
| end_mark = self.peek_token().start_mark |
| event = MappingStartEvent(anchor, tag, implicit, |
| start_mark, end_mark, flow_style=False) |
| self.state = self.parse_block_mapping_first_key |
| elif anchor is not None or tag is not None: |
| # Empty scalars are allowed even if a tag or an anchor is |
| # specified. |
| event = ScalarEvent(anchor, tag, (implicit, False), u'', |
| start_mark, end_mark) |
| self.state = self.states.pop() |
| else: |
| if block: |
| node = 'block' |
| else: |
| node = 'flow' |
| token = self.peek_token() |
| raise ParserError("while parsing a %s node" % node, start_mark, |
| "expected the node content, but found %r" % token.id, |
| token.start_mark) |
| return event |
| |
| # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END |
| |
| def parse_block_sequence_first_entry(self): |
| token = self.get_token() |
| self.marks.append(token.start_mark) |
| return self.parse_block_sequence_entry() |
| |
| def parse_block_sequence_entry(self): |
| if self.check_token(BlockEntryToken): |
| token = self.get_token() |
| if not self.check_token(BlockEntryToken, BlockEndToken): |
| self.states.append(self.parse_block_sequence_entry) |
| return self.parse_block_node() |
| else: |
| self.state = self.parse_block_sequence_entry |
| return self.process_empty_scalar(token.end_mark) |
| if not self.check_token(BlockEndToken): |
| token = self.peek_token() |
| raise ParserError("while parsing a block collection", self.marks[-1], |
| "expected <block end>, but found %r" % token.id, token.start_mark) |
| token = self.get_token() |
| event = SequenceEndEvent(token.start_mark, token.end_mark) |
| self.state = self.states.pop() |
| self.marks.pop() |
| return event |
| |
| # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ |
| |
| def parse_indentless_sequence_entry(self): |
| if self.check_token(BlockEntryToken): |
| token = self.get_token() |
| if not self.check_token(BlockEntryToken, |
| KeyToken, ValueToken, BlockEndToken): |
| self.states.append(self.parse_indentless_sequence_entry) |
| return self.parse_block_node() |
| else: |
| self.state = self.parse_indentless_sequence_entry |
| return self.process_empty_scalar(token.end_mark) |
| token = self.peek_token() |
| event = SequenceEndEvent(token.start_mark, token.start_mark) |
| self.state = self.states.pop() |
| return event |
| |
| # block_mapping ::= BLOCK-MAPPING_START |
| # ((KEY block_node_or_indentless_sequence?)? |
| # (VALUE block_node_or_indentless_sequence?)?)* |
| # BLOCK-END |
| |
| def parse_block_mapping_first_key(self): |
| token = self.get_token() |
| self.marks.append(token.start_mark) |
| return self.parse_block_mapping_key() |
| |
| def parse_block_mapping_key(self): |
| if self.check_token(KeyToken): |
| token = self.get_token() |
| if not self.check_token(KeyToken, ValueToken, BlockEndToken): |
| self.states.append(self.parse_block_mapping_value) |
| return self.parse_block_node_or_indentless_sequence() |
| else: |
| self.state = self.parse_block_mapping_value |
| return self.process_empty_scalar(token.end_mark) |
| if not self.check_token(BlockEndToken): |
| token = self.peek_token() |
| raise ParserError("while parsing a block mapping", self.marks[-1], |
| "expected <block end>, but found %r" % token.id, token.start_mark) |
| token = self.get_token() |
| event = MappingEndEvent(token.start_mark, token.end_mark) |
| self.state = self.states.pop() |
| self.marks.pop() |
| return event |
| |
| def parse_block_mapping_value(self): |
| if self.check_token(ValueToken): |
| token = self.get_token() |
| if not self.check_token(KeyToken, ValueToken, BlockEndToken): |
| self.states.append(self.parse_block_mapping_key) |
| return self.parse_block_node_or_indentless_sequence() |
| else: |
| self.state = self.parse_block_mapping_key |
| return self.process_empty_scalar(token.end_mark) |
| else: |
| self.state = self.parse_block_mapping_key |
| token = self.peek_token() |
| return self.process_empty_scalar(token.start_mark) |
| |
| # flow_sequence ::= FLOW-SEQUENCE-START |
| # (flow_sequence_entry FLOW-ENTRY)* |
| # flow_sequence_entry? |
| # FLOW-SEQUENCE-END |
| # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? |
| # |
| # Note that while production rules for both flow_sequence_entry and |
| # flow_mapping_entry are equal, their interpretations are different. |
| # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` |
| # generate an inline mapping (set syntax). |
| |
| def parse_flow_sequence_first_entry(self): |
| token = self.get_token() |
| self.marks.append(token.start_mark) |
| return self.parse_flow_sequence_entry(first=True) |
| |
| def parse_flow_sequence_entry(self, first=False): |
| if not self.check_token(FlowSequenceEndToken): |
| if not first: |
| if self.check_token(FlowEntryToken): |
| self.get_token() |
| else: |
| token = self.peek_token() |
| raise ParserError("while parsing a flow sequence", self.marks[-1], |
| "expected ',' or ']', but got %r" % token.id, token.start_mark) |
| |
| if self.check_token(KeyToken): |
| token = self.peek_token() |
| event = MappingStartEvent(None, None, True, |
| token.start_mark, token.end_mark, |
| flow_style=True) |
| self.state = self.parse_flow_sequence_entry_mapping_key |
| return event |
| elif not self.check_token(FlowSequenceEndToken): |
| self.states.append(self.parse_flow_sequence_entry) |
| return self.parse_flow_node() |
| token = self.get_token() |
| event = SequenceEndEvent(token.start_mark, token.end_mark) |
| self.state = self.states.pop() |
| self.marks.pop() |
| return event |
| |
| def parse_flow_sequence_entry_mapping_key(self): |
| token = self.get_token() |
| if not self.check_token(ValueToken, |
| FlowEntryToken, FlowSequenceEndToken): |
| self.states.append(self.parse_flow_sequence_entry_mapping_value) |
| return self.parse_flow_node() |
| else: |
| self.state = self.parse_flow_sequence_entry_mapping_value |
| return self.process_empty_scalar(token.end_mark) |
| |
| def parse_flow_sequence_entry_mapping_value(self): |
| if self.check_token(ValueToken): |
| token = self.get_token() |
| if not self.check_token(FlowEntryToken, FlowSequenceEndToken): |
| self.states.append(self.parse_flow_sequence_entry_mapping_end) |
| return self.parse_flow_node() |
| else: |
| self.state = self.parse_flow_sequence_entry_mapping_end |
| return self.process_empty_scalar(token.end_mark) |
| else: |
| self.state = self.parse_flow_sequence_entry_mapping_end |
| token = self.peek_token() |
| return self.process_empty_scalar(token.start_mark) |
| |
| def parse_flow_sequence_entry_mapping_end(self): |
| self.state = self.parse_flow_sequence_entry |
| token = self.peek_token() |
| return MappingEndEvent(token.start_mark, token.start_mark) |
| |
| # flow_mapping ::= FLOW-MAPPING-START |
| # (flow_mapping_entry FLOW-ENTRY)* |
| # flow_mapping_entry? |
| # FLOW-MAPPING-END |
| # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? |
| |
| def parse_flow_mapping_first_key(self): |
| token = self.get_token() |
| self.marks.append(token.start_mark) |
| return self.parse_flow_mapping_key(first=True) |
| |
| def parse_flow_mapping_key(self, first=False): |
| if not self.check_token(FlowMappingEndToken): |
| if not first: |
| if self.check_token(FlowEntryToken): |
| self.get_token() |
| else: |
| token = self.peek_token() |
| raise ParserError("while parsing a flow mapping", self.marks[-1], |
| "expected ',' or '}', but got %r" % token.id, token.start_mark) |
| if self.check_token(KeyToken): |
| token = self.get_token() |
| if not self.check_token(ValueToken, |
| FlowEntryToken, FlowMappingEndToken): |
| self.states.append(self.parse_flow_mapping_value) |
| return self.parse_flow_node() |
| else: |
| self.state = self.parse_flow_mapping_value |
| return self.process_empty_scalar(token.end_mark) |
| elif not self.check_token(FlowMappingEndToken): |
| self.states.append(self.parse_flow_mapping_empty_value) |
| return self.parse_flow_node() |
| token = self.get_token() |
| event = MappingEndEvent(token.start_mark, token.end_mark) |
| self.state = self.states.pop() |
| self.marks.pop() |
| return event |
| |
| def parse_flow_mapping_value(self): |
| if self.check_token(ValueToken): |
| token = self.get_token() |
| if not self.check_token(FlowEntryToken, FlowMappingEndToken): |
| self.states.append(self.parse_flow_mapping_key) |
| return self.parse_flow_node() |
| else: |
| self.state = self.parse_flow_mapping_key |
| return self.process_empty_scalar(token.end_mark) |
| else: |
| self.state = self.parse_flow_mapping_key |
| token = self.peek_token() |
| return self.process_empty_scalar(token.start_mark) |
| |
| def parse_flow_mapping_empty_value(self): |
| self.state = self.parse_flow_mapping_key |
| return self.process_empty_scalar(self.peek_token().start_mark) |
| |
| def process_empty_scalar(self, mark): |
| return ScalarEvent(None, None, (True, False), u'', mark, mark) |
| |