Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
emitter.py
Go to the documentation of this file.
1 # (c) Copyright Rosetta Commons Member Institutions.
2 # (c) This file is part of the Rosetta software suite and is made available under license.
3 # (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
4 # (c) For more information, see http://www.rosettacommons.org. Questions about this can be
5 # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
6 
7 # Emitter expects events obeying the following grammar:
8 # stream ::= STREAM-START document* STREAM-END
9 # document ::= DOCUMENT-START node DOCUMENT-END
10 # node ::= SCALAR | sequence | mapping
11 # sequence ::= SEQUENCE-START node* SEQUENCE-END
12 # mapping ::= MAPPING-START (node node)* MAPPING-END
13 
14 __all__ = ['Emitter', 'EmitterError']
15 
16 from error import YAMLError
17 from events import *
18 
19 import re
20 
21 class EmitterError(YAMLError):
22  pass
23 
24 class ScalarAnalysis(object):
25  def __init__(self, scalar, empty, multiline,
26  allow_flow_plain, allow_block_plain,
27  allow_single_quoted, allow_double_quoted,
28  allow_block):
29  self.scalar = scalar
30  self.empty = empty
31  self.multiline = multiline
32  self.allow_flow_plain = allow_flow_plain
33  self.allow_block_plain = allow_block_plain
34  self.allow_single_quoted = allow_single_quoted
35  self.allow_double_quoted = allow_double_quoted
36  self.allow_block = allow_block
37 
38 class Emitter(object):
39 
40  DEFAULT_TAG_PREFIXES = {
41  u'!' : u'!',
42  u'tag:yaml.org,2002:' : u'!!',
43  }
44 
45  def __init__(self, stream, canonical=None, indent=None, width=None,
46  allow_unicode=None, line_break=None):
47 
48  # The stream should have the methods `write` and possibly `flush`.
49  self.stream = stream
50 
51  # Encoding can be overriden by STREAM-START.
52  self.encoding = None
53 
54  # Emitter is a state machine with a stack of states to handle nested
55  # structures.
56  self.states = []
58 
59  # Current event and the event queue.
60  self.events = []
61  self.event = None
62 
63  # The current indentation level and the stack of previous indents.
64  self.indents = []
65  self.indent = None
66 
67  # Flow level.
68  self.flow_level = 0
69 
70  # Contexts.
71  self.root_context = False
72  self.sequence_context = False
73  self.mapping_context = False
74  self.simple_key_context = False
75 
76  # Characteristics of the last emitted character:
77  # - current position.
78  # - is it a whitespace?
79  # - is it an indention character
80  # (indentation space, '-', '?', or ':')?
81  self.line = 0
82  self.column = 0
83  self.whitespace = True
84  self.indention = True
85 
86  # Formatting details.
87  self.canonical = canonical
88  self.allow_unicode = allow_unicode
89  self.best_indent = 2
90  if indent and 1 < indent < 10:
91  self.best_indent = indent
92  self.best_width = 80
93  if width and width > self.best_indent*2:
94  self.best_width = width
95  self.best_line_break = u'\n'
96  if line_break in [u'\r', u'\n', u'\r\n']:
97  self.best_line_break = line_break
98 
99  # Tag prefixes.
100  self.tag_prefixes = None
101 
102  # Prepared anchor and tag.
103  self.prepared_anchor = None
104  self.prepared_tag = None
105 
106  # Scalar analysis and style.
107  self.analysis = None
108  self.style = None
109 
110  def emit(self, event):
111  self.events.append(event)
112  while not self.need_more_events():
113  self.event = self.events.pop(0)
114  self.state()
115  self.event = None
116 
117  # In some cases, we wait for a few next events before emitting.
118 
119  def need_more_events(self):
120  if not self.events:
121  return True
122  event = self.events[0]
123  if isinstance(event, DocumentStartEvent):
124  return self.need_events(1)
125  elif isinstance(event, SequenceStartEvent):
126  return self.need_events(2)
127  elif isinstance(event, MappingStartEvent):
128  return self.need_events(3)
129  else:
130  return False
131 
132  def need_events(self, count):
133  level = 0
134  for event in self.events[1:]:
135  if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
136  level += 1
137  elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
138  level -= 1
139  elif isinstance(event, StreamEndEvent):
140  level = -1
141  if level < 0:
142  return False
143  return (len(self.events) < count+1)
144 
145  def increase_indent(self, flow=False, indentless=False):
146  self.indents.append(self.indent)
147  if self.indent is None:
148  if flow:
149  self.indent = self.best_indent
150  else:
151  self.indent = 0
152  elif not indentless:
153  self.indent += self.best_indent
154 
155  # States.
156 
157  # Stream handlers.
158 
160  if isinstance(self.event, StreamStartEvent):
161  if self.event.encoding:
162  self.encoding = self.event.encoding
163  self.write_stream_start()
165  else:
166  raise EmitterError("expected StreamStartEvent, but got %s"
167  % self.event)
168 
169  def expect_nothing(self):
170  raise EmitterError("expected nothing, but got %s" % self.event)
171 
172  # Document handlers.
173 
175  return self.expect_document_start(first=True)
176 
177  def expect_document_start(self, first=False):
178  if isinstance(self.event, DocumentStartEvent):
179  if self.event.version:
180  version_text = self.prepare_version(self.event.version)
181  self.write_version_directive(version_text)
182  self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
183  if self.event.tags:
184  handles = self.event.tags.keys()
185  handles.sort()
186  for handle in handles:
187  prefix = self.event.tags[handle]
188  self.tag_prefixes[prefix] = handle
189  handle_text = self.prepare_tag_handle(handle)
190  prefix_text = self.prepare_tag_prefix(prefix)
191  self.write_tag_directive(handle_text, prefix_text)
192  implicit = (first and not self.event.explicit and not self.canonical
193  and not self.event.version and not self.event.tags
194  and not self.check_empty_document())
195  if not implicit:
196  self.write_indent()
197  self.write_indicator(u'---', True)
198  if self.canonical:
199  self.write_indent()
200  self.state = self.expect_document_root
201  elif isinstance(self.event, StreamEndEvent):
202  self.write_stream_end()
203  self.state = self.expect_nothing
204  else:
205  raise EmitterError("expected DocumentStartEvent, but got %s"
206  % self.event)
207 
209  if isinstance(self.event, DocumentEndEvent):
210  self.write_indent()
211  if self.event.explicit:
212  self.write_indicator(u'...', True)
213  self.write_indent()
214  self.flush_stream()
215  self.state = self.expect_document_start
216  else:
217  raise EmitterError("expected DocumentEndEvent, but got %s"
218  % self.event)
219 
221  self.states.append(self.expect_document_end)
222  self.expect_node(root=True)
223 
224  # Node handlers.
225 
226  def expect_node(self, root=False, sequence=False, mapping=False,
227  simple_key=False):
228  self.root_context = root
229  self.sequence_context = sequence
230  self.mapping_context = mapping
231  self.simple_key_context = simple_key
232  if isinstance(self.event, AliasEvent):
233  self.expect_alias()
234  elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
235  self.process_anchor(u'&')
236  self.process_tag()
237  if isinstance(self.event, ScalarEvent):
238  self.expect_scalar()
239  elif isinstance(self.event, SequenceStartEvent):
240  if self.flow_level or self.canonical or self.event.flow_style \
241  or self.check_empty_sequence():
242  self.expect_flow_sequence()
243  else:
244  self.expect_block_sequence()
245  elif isinstance(self.event, MappingStartEvent):
246  if self.flow_level or self.canonical or self.event.flow_style \
247  or self.check_empty_mapping():
248  self.expect_flow_mapping()
249  else:
250  self.expect_block_mapping()
251  else:
252  raise EmitterError("expected NodeEvent, but got %s" % self.event)
253 
254  def expect_alias(self):
255  if self.event.anchor is None:
256  raise EmitterError("anchor is not specified for alias")
257  self.process_anchor(u'*')
258  self.state = self.states.pop()
259 
260  def expect_scalar(self):
261  self.increase_indent(flow=True)
262  self.process_scalar()
263  self.indent = self.indents.pop()
264  self.state = self.states.pop()
265 
266  # Flow sequence handlers.
267 
269  self.write_indicator(u'[', True, whitespace=True)
270  self.flow_level += 1
271  self.increase_indent(flow=True)
273 
275  if isinstance(self.event, SequenceEndEvent):
276  self.indent = self.indents.pop()
277  self.flow_level -= 1
278  self.write_indicator(u']', False)
279  self.state = self.states.pop()
280  else:
281  if self.canonical or self.column > self.best_width:
282  self.write_indent()
283  self.states.append(self.expect_flow_sequence_item)
284  self.expect_node(sequence=True)
285 
287  if isinstance(self.event, SequenceEndEvent):
288  self.indent = self.indents.pop()
289  self.flow_level -= 1
290  if self.canonical:
291  self.write_indicator(u',', False)
292  self.write_indent()
293  self.write_indicator(u']', False)
294  self.state = self.states.pop()
295  else:
296  self.write_indicator(u',', False)
297  if self.canonical or self.column > self.best_width:
298  self.write_indent()
299  self.states.append(self.expect_flow_sequence_item)
300  self.expect_node(sequence=True)
301 
302  # Flow mapping handlers.
303 
305  self.write_indicator(u'{', True, whitespace=True)
306  self.flow_level += 1
307  self.increase_indent(flow=True)
309 
311  if isinstance(self.event, MappingEndEvent):
312  self.indent = self.indents.pop()
313  self.flow_level -= 1
314  self.write_indicator(u'}', False)
315  self.state = self.states.pop()
316  else:
317  if self.canonical or self.column > self.best_width:
318  self.write_indent()
319  if not self.canonical and self.check_simple_key():
320  self.states.append(self.expect_flow_mapping_simple_value)
321  self.expect_node(mapping=True, simple_key=True)
322  else:
323  self.write_indicator(u'?', True)
324  self.states.append(self.expect_flow_mapping_value)
325  self.expect_node(mapping=True)
326 
328  if isinstance(self.event, MappingEndEvent):
329  self.indent = self.indents.pop()
330  self.flow_level -= 1
331  if self.canonical:
332  self.write_indicator(u',', False)
333  self.write_indent()
334  self.write_indicator(u'}', False)
335  self.state = self.states.pop()
336  else:
337  self.write_indicator(u',', False)
338  if self.canonical or self.column > self.best_width:
339  self.write_indent()
340  if not self.canonical and self.check_simple_key():
341  self.states.append(self.expect_flow_mapping_simple_value)
342  self.expect_node(mapping=True, simple_key=True)
343  else:
344  self.write_indicator(u'?', True)
345  self.states.append(self.expect_flow_mapping_value)
346  self.expect_node(mapping=True)
347 
349  self.write_indicator(u':', False)
350  self.states.append(self.expect_flow_mapping_key)
351  self.expect_node(mapping=True)
352 
354  if self.canonical or self.column > self.best_width:
355  self.write_indent()
356  self.write_indicator(u':', True)
357  self.states.append(self.expect_flow_mapping_key)
358  self.expect_node(mapping=True)
359 
360  # Block sequence handlers.
361 
363  indentless = (self.mapping_context and not self.indention)
364  self.increase_indent(flow=False, indentless=indentless)
366 
368  return self.expect_block_sequence_item(first=True)
369 
370  def expect_block_sequence_item(self, first=False):
371  if not first and isinstance(self.event, SequenceEndEvent):
372  self.indent = self.indents.pop()
373  self.state = self.states.pop()
374  else:
375  self.write_indent()
376  self.write_indicator(u'-', True, indention=True)
377  self.states.append(self.expect_block_sequence_item)
378  self.expect_node(sequence=True)
379 
380  # Block mapping handlers.
381 
383  self.increase_indent(flow=False)
385 
387  return self.expect_block_mapping_key(first=True)
388 
389  def expect_block_mapping_key(self, first=False):
390  if not first and isinstance(self.event, MappingEndEvent):
391  self.indent = self.indents.pop()
392  self.state = self.states.pop()
393  else:
394  self.write_indent()
395  if self.check_simple_key():
396  self.states.append(self.expect_block_mapping_simple_value)
397  self.expect_node(mapping=True, simple_key=True)
398  else:
399  self.write_indicator(u'?', True, indention=True)
400  self.states.append(self.expect_block_mapping_value)
401  self.expect_node(mapping=True)
402 
404  self.write_indicator(u':', False)
405  self.states.append(self.expect_block_mapping_key)
406  self.expect_node(mapping=True)
407 
409  self.write_indent()
410  self.write_indicator(u':', True, indention=True)
411  self.states.append(self.expect_block_mapping_key)
412  self.expect_node(mapping=True)
413 
414  # Checkers.
415 
417  return (isinstance(self.event, SequenceStartEvent) and self.events
418  and isinstance(self.events[0], SequenceEndEvent))
419 
421  return (isinstance(self.event, MappingStartEvent) and self.events
422  and isinstance(self.events[0], MappingEndEvent))
423 
425  if not isinstance(self.event, DocumentStartEvent) or not self.events:
426  return False
427  event = self.events[0]
428  return (isinstance(event, ScalarEvent) and event.anchor is None
429  and event.tag is None and event.implicit and event.value == u'')
430 
431  def check_simple_key(self):
432  length = 0
433  if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
434  if self.prepared_anchor is None:
435  self.prepared_anchor = self.prepare_anchor(self.event.anchor)
436  length += len(self.prepared_anchor)
437  if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
438  and self.event.tag is not None:
439  if self.prepared_tag is None:
440  self.prepared_tag = self.prepare_tag(self.event.tag)
441  length += len(self.prepared_tag)
442  if isinstance(self.event, ScalarEvent):
443  if self.analysis is None:
444  self.analysis = self.analyze_scalar(self.event.value)
445  length += len(self.analysis.scalar)
446  return (length < 128 and (isinstance(self.event, AliasEvent)
447  or (isinstance(self.event, ScalarEvent)
448  and not self.analysis.empty and not self.analysis.multiline)
449  or self.check_empty_sequence() or self.check_empty_mapping()))
450 
451  # Anchor, Tag, and Scalar processors.
452 
453  def process_anchor(self, indicator):
454  if self.event.anchor is None:
455  self.prepared_anchor = None
456  return
457  if self.prepared_anchor is None:
458  self.prepared_anchor = self.prepare_anchor(self.event.anchor)
459  if self.prepared_anchor:
460  self.write_indicator(indicator+self.prepared_anchor, True)
461  self.prepared_anchor = None
462 
463  def process_tag(self):
464  tag = self.event.tag
465  if isinstance(self.event, ScalarEvent):
466  if self.style is None:
467  self.style = self.choose_scalar_style()
468  if ((not self.canonical or tag is None) and
469  ((self.style == '' and self.event.implicit[0])
470  or (self.style != '' and self.event.implicit[1]))):
471  self.prepared_tag = None
472  return
473  if self.event.implicit[0] and tag is None:
474  tag = u'!'
475  self.prepared_tag = None
476  else:
477  if (not self.canonical or tag is None) and self.event.implicit:
478  self.prepared_tag = None
479  return
480  if tag is None:
481  raise EmitterError("tag is not specified")
482  if self.prepared_tag is None:
483  self.prepared_tag = self.prepare_tag(tag)
484  if self.prepared_tag:
485  self.write_indicator(self.prepared_tag, True)
486  self.prepared_tag = None
487 
489  if self.analysis is None:
490  self.analysis = self.analyze_scalar(self.event.value)
491  if self.event.style == '"' or self.canonical:
492  return '"'
493  if not self.event.style and self.event.implicit[0]:
494  if (not (self.simple_key_context and
495  (self.analysis.empty or self.analysis.multiline))
496  and (self.flow_level and self.analysis.allow_flow_plain
497  or (not self.flow_level and self.analysis.allow_block_plain))):
498  return ''
499  if self.event.style and self.event.style in '|>':
500  if (not self.flow_level and not self.simple_key_context
501  and self.analysis.allow_block):
502  return self.event.style
503  if not self.event.style or self.event.style == '\'':
504  if (self.analysis.allow_single_quoted and
505  not (self.simple_key_context and self.analysis.multiline)):
506  return '\''
507  return '"'
508 
509  def process_scalar(self):
510  if self.analysis is None:
511  self.analysis = self.analyze_scalar(self.event.value)
512  if self.style is None:
513  self.style = self.choose_scalar_style()
514  split = (not self.simple_key_context)
515  #if self.analysis.multiline and split \
516  # and (not self.style or self.style in '\'\"'):
517  # self.write_indent()
518  if self.style == '"':
519  self.write_double_quoted(self.analysis.scalar, split)
520  elif self.style == '\'':
521  self.write_single_quoted(self.analysis.scalar, split)
522  elif self.style == '>':
523  self.write_folded(self.analysis.scalar)
524  elif self.style == '|':
525  self.write_literal(self.analysis.scalar)
526  else:
527  self.write_plain(self.analysis.scalar, split)
528  self.analysis = None
529  self.style = None
530 
531  # Analyzers.
532 
533  def prepare_version(self, version):
534  major, minor = version
535  if major != 1:
536  raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
537  return u'%d.%d' % (major, minor)
538 
539  def prepare_tag_handle(self, handle):
540  if not handle:
541  raise EmitterError("tag handle must not be empty")
542  if handle[0] != u'!' or handle[-1] != u'!':
543  raise EmitterError("tag handle must start and end with '!': %r"
544  % (handle.encode('utf-8')))
545  for ch in handle[1:-1]:
546  if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
547  or ch in u'-_'):
548  raise EmitterError("invalid character %r in the tag handle: %r"
549  % (ch.encode('utf-8'), handle.encode('utf-8')))
550  return handle
551 
552  def prepare_tag_prefix(self, prefix):
553  if not prefix:
554  raise EmitterError("tag prefix must not be empty")
555  chunks = []
556  start = end = 0
557  if prefix[0] == u'!':
558  end = 1
559  while end < len(prefix):
560  ch = prefix[end]
561  if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
562  or ch in u'-;/?!:@&=+$,_.~*\'()[]':
563  end += 1
564  else:
565  if start < end:
566  chunks.append(prefix[start:end])
567  start = end = end+1
568  data = ch.encode('utf-8')
569  for ch in data:
570  chunks.append(u'%%%02X' % ord(ch))
571  if start < end:
572  chunks.append(prefix[start:end])
573  return u''.join(chunks)
574 
575  def prepare_tag(self, tag):
576  if not tag:
577  raise EmitterError("tag must not be empty")
578  if tag == u'!':
579  return tag
580  handle = None
581  suffix = tag
582  for prefix in self.tag_prefixes:
583  if tag.startswith(prefix) \
584  and (prefix == u'!' or len(prefix) < len(tag)):
585  handle = self.tag_prefixes[prefix]
586  suffix = tag[len(prefix):]
587  chunks = []
588  start = end = 0
589  while end < len(suffix):
590  ch = suffix[end]
591  if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
592  or ch in u'-;/?:@&=+$,_.~*\'()[]' \
593  or (ch == u'!' and handle != u'!'):
594  end += 1
595  else:
596  if start < end:
597  chunks.append(suffix[start:end])
598  start = end = end+1
599  data = ch.encode('utf-8')
600  for ch in data:
601  chunks.append(u'%%%02X' % ord(ch))
602  if start < end:
603  chunks.append(suffix[start:end])
604  suffix_text = u''.join(chunks)
605  if handle:
606  return u'%s%s' % (handle, suffix_text)
607  else:
608  return u'!<%s>' % suffix_text
609 
610  def prepare_anchor(self, anchor):
611  if not anchor:
612  raise EmitterError("anchor must not be empty")
613  for ch in anchor:
614  if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
615  or ch in u'-_'):
616  raise EmitterError("invalid character %r in the anchor: %r"
617  % (ch.encode('utf-8'), anchor.encode('utf-8')))
618  return anchor
619 
620  def analyze_scalar(self, scalar):
621 
622  # Empty scalar is a special case.
623  if not scalar:
624  return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
625  allow_flow_plain=False, allow_block_plain=True,
626  allow_single_quoted=True, allow_double_quoted=True,
627  allow_block=False)
628 
629  # Indicators and special characters.
630  block_indicators = False
631  flow_indicators = False
632  line_breaks = False
633  special_characters = False
634 
635  # Whitespaces.
636  inline_spaces = False # non-space space+ non-space
637  inline_breaks = False # non-space break+ non-space
638  leading_spaces = False # ^ space+ (non-space | $)
639  leading_breaks = False # ^ break+ (non-space | $)
640  trailing_spaces = False # (^ | non-space) space+ $
641  trailing_breaks = False # (^ | non-space) break+ $
642  inline_breaks_spaces = False # non-space break+ space+ non-space
643  mixed_breaks_spaces = False # anything else
644 
645  # Check document indicators.
646  if scalar.startswith(u'---') or scalar.startswith(u'...'):
647  block_indicators = True
648  flow_indicators = True
649 
650  # First character or preceded by a whitespace.
651  preceeded_by_space = True
652 
653  # Last character or followed by a whitespace.
654  followed_by_space = (len(scalar) == 1 or
655  scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
656 
657  # The current series of whitespaces contain plain spaces.
658  spaces = False
659 
660  # The current series of whitespaces contain line breaks.
661  breaks = False
662 
663  # The current series of whitespaces contain a space followed by a
664  # break.
665  mixed = False
666 
667  # The current series of whitespaces start at the beginning of the
668  # scalar.
669  leading = False
670 
671  index = 0
672  while index < len(scalar):
673  ch = scalar[index]
674 
675  # Check for indicators.
676 
677  if index == 0:
678  # Leading indicators are special characters.
679  if ch in u'#,[]{}&*!|>\'\"%@`':
680  flow_indicators = True
681  block_indicators = True
682  if ch in u'?:':
683  flow_indicators = True
684  if followed_by_space:
685  block_indicators = True
686  if ch == u'-' and followed_by_space:
687  flow_indicators = True
688  block_indicators = True
689  else:
690  # Some indicators cannot appear within a scalar as well.
691  if ch in u',?[]{}':
692  flow_indicators = True
693  if ch == u':':
694  flow_indicators = True
695  if followed_by_space:
696  block_indicators = True
697  if ch == u'#' and preceeded_by_space:
698  flow_indicators = True
699  block_indicators = True
700 
701  # Check for line breaks, special, and unicode characters.
702 
703  if ch in u'\n\x85\u2028\u2029':
704  line_breaks = True
705  if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
706  if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
707  or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
708  unicode_characters = True
709  if not self.allow_unicode:
710  special_characters = True
711  else:
712  special_characters = True
713 
714  # Spaces, line breaks, and how they are mixed. State machine.
715 
716  # Start or continue series of whitespaces.
717  if ch in u' \n\x85\u2028\u2029':
718  if spaces and breaks:
719  if ch != u' ': # break+ (space+ break+) => mixed
720  mixed = True
721  elif spaces:
722  if ch != u' ': # (space+ break+) => mixed
723  breaks = True
724  mixed = True
725  elif breaks:
726  if ch == u' ': # break+ space+
727  spaces = True
728  else:
729  leading = (index == 0)
730  if ch == u' ': # space+
731  spaces = True
732  else: # break+
733  breaks = True
734 
735  # Series of whitespaces ended with a non-space.
736  elif spaces or breaks:
737  if leading:
738  if spaces and breaks:
739  mixed_breaks_spaces = True
740  elif spaces:
741  leading_spaces = True
742  elif breaks:
743  leading_breaks = True
744  else:
745  if mixed:
746  mixed_breaks_spaces = True
747  elif spaces and breaks:
748  inline_breaks_spaces = True
749  elif spaces:
750  inline_spaces = True
751  elif breaks:
752  inline_breaks = True
753  spaces = breaks = mixed = leading = False
754 
755  # Series of whitespaces reach the end.
756  if (spaces or breaks) and (index == len(scalar)-1):
757  if spaces and breaks:
758  mixed_breaks_spaces = True
759  elif spaces:
760  trailing_spaces = True
761  if leading:
762  leading_spaces = True
763  elif breaks:
764  trailing_breaks = True
765  if leading:
766  leading_breaks = True
767  spaces = breaks = mixed = leading = False
768 
769  # Prepare for the next character.
770  index += 1
771  preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
772  followed_by_space = (index+1 >= len(scalar) or
773  scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
774 
775  # Let's decide what styles are allowed.
776  allow_flow_plain = True
777  allow_block_plain = True
778  allow_single_quoted = True
779  allow_double_quoted = True
780  allow_block = True
781 
782  # Leading and trailing whitespace are bad for plain scalars. We also
783  # do not want to mess with leading whitespaces for block scalars.
784  if leading_spaces or leading_breaks or trailing_spaces:
785  allow_flow_plain = allow_block_plain = allow_block = False
786 
787  # Trailing breaks are fine for block scalars, but unacceptable for
788  # plain scalars.
789  if trailing_breaks:
790  allow_flow_plain = allow_block_plain = False
791 
792  # The combination of (space+ break+) is only acceptable for block
793  # scalars.
794  if inline_breaks_spaces:
795  allow_flow_plain = allow_block_plain = allow_single_quoted = False
796 
797  # Mixed spaces and breaks, as well as special character are only
798  # allowed for double quoted scalars.
799  if mixed_breaks_spaces or special_characters:
800  allow_flow_plain = allow_block_plain = \
801  allow_single_quoted = allow_block = False
802 
803  # We don't emit multiline plain scalars.
804  if line_breaks:
805  allow_flow_plain = allow_block_plain = False
806 
807  # Flow indicators are forbidden for flow plain scalars.
808  if flow_indicators:
809  allow_flow_plain = False
810 
811  # Block indicators are forbidden for block plain scalars.
812  if block_indicators:
813  allow_block_plain = False
814 
815  return ScalarAnalysis(scalar=scalar,
816  empty=False, multiline=line_breaks,
817  allow_flow_plain=allow_flow_plain,
818  allow_block_plain=allow_block_plain,
819  allow_single_quoted=allow_single_quoted,
820  allow_double_quoted=allow_double_quoted,
821  allow_block=allow_block)
822 
823  # Writers.
824 
825  def flush_stream(self):
826  if hasattr(self.stream, 'flush'):
827  self.stream.flush()
828 
830  # Write BOM if needed.
831  if self.encoding and self.encoding.startswith('utf-16'):
832  self.stream.write(u'\xFF\xFE'.encode(self.encoding))
833 
834  def write_stream_end(self):
835  self.flush_stream()
836 
837  def write_indicator(self, indicator, need_whitespace,
838  whitespace=False, indention=False):
839  if self.whitespace or not need_whitespace:
840  data = indicator
841  else:
842  data = u' '+indicator
843  self.whitespace = whitespace
844  self.indention = self.indention and indention
845  self.column += len(data)
846  if self.encoding:
847  data = data.encode(self.encoding)
848  self.stream.write(data)
849 
850  def write_indent(self):
851  indent = self.indent or 0
852  if not self.indention or self.column > indent \
853  or (self.column == indent and not self.whitespace):
854  self.write_line_break()
855  if self.column < indent:
856  self.whitespace = True
857  data = u' '*(indent-self.column)
858  self.column = indent
859  if self.encoding:
860  data = data.encode(self.encoding)
861  self.stream.write(data)
862 
863  def write_line_break(self, data=None):
864  if data is None:
865  data = self.best_line_break
866  self.whitespace = True
867  self.indention = True
868  self.line += 1
869  self.column = 0
870  if self.encoding:
871  data = data.encode(self.encoding)
872  self.stream.write(data)
873 
874  def write_version_directive(self, version_text):
875  data = u'%%YAML %s' % version_text
876  if self.encoding:
877  data = data.encode(self.encoding)
878  self.stream.write(data)
879  self.write_line_break()
880 
881  def write_tag_directive(self, handle_text, prefix_text):
882  data = u'%%TAG %s %s' % (handle_text, prefix_text)
883  if self.encoding:
884  data = data.encode(self.encoding)
885  self.stream.write(data)
886  self.write_line_break()
887 
888  # Scalar streams.
889 
890  def write_single_quoted(self, text, split=True):
891  self.write_indicator(u'\'', True)
892  spaces = False
893  breaks = False
894  start = end = 0
895  while end <= len(text):
896  ch = None
897  if end < len(text):
898  ch = text[end]
899  if spaces:
900  if ch is None or ch != u' ':
901  if start+1 == end and self.column > self.best_width and split \
902  and start != 0 and end != len(text):
903  self.write_indent()
904  else:
905  data = text[start:end]
906  self.column += len(data)
907  if self.encoding:
908  data = data.encode(self.encoding)
909  self.stream.write(data)
910  start = end
911  elif breaks:
912  if ch is None or ch not in u'\n\x85\u2028\u2029':
913  if text[start] == u'\n':
914  self.write_line_break()
915  for br in text[start:end]:
916  if br == u'\n':
917  self.write_line_break()
918  else:
919  self.write_line_break(br)
920  self.write_indent()
921  start = end
922  else:
923  if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
924  if start < end:
925  data = text[start:end]
926  self.column += len(data)
927  if self.encoding:
928  data = data.encode(self.encoding)
929  self.stream.write(data)
930  start = end
931  if ch == u'\'':
932  data = u'\'\''
933  self.column += 2
934  if self.encoding:
935  data = data.encode(self.encoding)
936  self.stream.write(data)
937  start = end + 1
938  if ch is not None:
939  spaces = (ch == u' ')
940  breaks = (ch in u'\n\x85\u2028\u2029')
941  end += 1
942  self.write_indicator(u'\'', False)
943 
944  ESCAPE_REPLACEMENTS = {
945  u'\0': u'0',
946  u'\x07': u'a',
947  u'\x08': u'b',
948  u'\x09': u't',
949  u'\x0A': u'n',
950  u'\x0B': u'v',
951  u'\x0C': u'f',
952  u'\x0D': u'r',
953  u'\x1B': u'e',
954  u'\"': u'\"',
955  u'\\': u'\\',
956  u'\x85': u'N',
957  u'\xA0': u'_',
958  u'\u2028': u'L',
959  u'\u2029': u'P',
960  }
961 
962  def write_double_quoted(self, text, split=True):
963  self.write_indicator(u'"', True)
964  start = end = 0
965  while end <= len(text):
966  ch = None
967  if end < len(text):
968  ch = text[end]
969  if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
970  or not (u'\x20' <= ch <= u'\x7E'
971  or (self.allow_unicode
972  and (u'\xA0' <= ch <= u'\uD7FF'
973  or u'\uE000' <= ch <= u'\uFFFD'))):
974  if start < end:
975  data = text[start:end]
976  self.column += len(data)
977  if self.encoding:
978  data = data.encode(self.encoding)
979  self.stream.write(data)
980  start = end
981  if ch is not None:
982  if ch in self.ESCAPE_REPLACEMENTS:
983  data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
984  elif ch <= u'\xFF':
985  data = u'\\x%02X' % ord(ch)
986  elif ch <= u'\uFFFF':
987  data = u'\\u%04X' % ord(ch)
988  else:
989  data = u'\\U%08X' % ord(ch)
990  self.column += len(data)
991  if self.encoding:
992  data = data.encode(self.encoding)
993  self.stream.write(data)
994  start = end+1
995  if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
996  and self.column+(end-start) > self.best_width and split:
997  data = text[start:end]+u'\\'
998  if start < end:
999  start = end
1000  self.column += len(data)
1001  if self.encoding:
1002  data = data.encode(self.encoding)
1003  self.stream.write(data)
1004  self.write_indent()
1005  self.whitespace = False
1006  self.indention = False
1007  if text[start] == u' ':
1008  data = u'\\'
1009  self.column += len(data)
1010  if self.encoding:
1011  data = data.encode(self.encoding)
1012  self.stream.write(data)
1013  end += 1
1014  self.write_indicator(u'"', False)
1015 
1016  def determine_chomp(self, text):
1017  tail = text[-2:]
1018  while len(tail) < 2:
1019  tail = u' '+tail
1020  if tail[-1] in u'\n\x85\u2028\u2029':
1021  if tail[-2] in u'\n\x85\u2028\u2029':
1022  return u'+'
1023  else:
1024  return u''
1025  else:
1026  return u'-'
1027 
1028  def write_folded(self, text):
1029  chomp = self.determine_chomp(text)
1030  self.write_indicator(u'>'+chomp, True)
1031  self.write_indent()
1032  leading_space = False
1033  spaces = False
1034  breaks = False
1035  start = end = 0
1036  while end <= len(text):
1037  ch = None
1038  if end < len(text):
1039  ch = text[end]
1040  if breaks:
1041  if ch is None or ch not in u'\n\x85\u2028\u2029':
1042  if not leading_space and ch is not None and ch != u' ' \
1043  and text[start] == u'\n':
1044  self.write_line_break()
1045  leading_space = (ch == u' ')
1046  for br in text[start:end]:
1047  if br == u'\n':
1048  self.write_line_break()
1049  else:
1050  self.write_line_break(br)
1051  if ch is not None:
1052  self.write_indent()
1053  start = end
1054  elif spaces:
1055  if ch != u' ':
1056  if start+1 == end and self.column > self.best_width:
1057  self.write_indent()
1058  else:
1059  data = text[start:end]
1060  self.column += len(data)
1061  if self.encoding:
1062  data = data.encode(self.encoding)
1063  self.stream.write(data)
1064  start = end
1065  else:
1066  if ch is None or ch in u' \n\x85\u2028\u2029':
1067  data = text[start:end]
1068  if self.encoding:
1069  data = data.encode(self.encoding)
1070  self.stream.write(data)
1071  if ch is None:
1072  self.write_line_break()
1073  start = end
1074  if ch is not None:
1075  breaks = (ch in u'\n\x85\u2028\u2029')
1076  spaces = (ch == u' ')
1077  end += 1
1078 
1079  def write_literal(self, text):
1080  chomp = self.determine_chomp(text)
1081  self.write_indicator(u'|'+chomp, True)
1082  self.write_indent()
1083  breaks = False
1084  start = end = 0
1085  while end <= len(text):
1086  ch = None
1087  if end < len(text):
1088  ch = text[end]
1089  if breaks:
1090  if ch is None or ch not in u'\n\x85\u2028\u2029':
1091  for br in text[start:end]:
1092  if br == u'\n':
1093  self.write_line_break()
1094  else:
1095  self.write_line_break(br)
1096  if ch is not None:
1097  self.write_indent()
1098  start = end
1099  else:
1100  if ch is None or ch in u'\n\x85\u2028\u2029':
1101  data = text[start:end]
1102  if self.encoding:
1103  data = data.encode(self.encoding)
1104  self.stream.write(data)
1105  if ch is None:
1106  self.write_line_break()
1107  start = end
1108  if ch is not None:
1109  breaks = (ch in u'\n\x85\u2028\u2029')
1110  end += 1
1111 
1112  def write_plain(self, text, split=True):
1113  if not text:
1114  return
1115  if not self.whitespace:
1116  data = u' '
1117  self.column += len(data)
1118  if self.encoding:
1119  data = data.encode(self.encoding)
1120  self.stream.write(data)
1121  self.writespace = False
1122  self.indention = False
1123  spaces = False
1124  breaks = False
1125  start = end = 0
1126  while end <= len(text):
1127  ch = None
1128  if end < len(text):
1129  ch = text[end]
1130  if spaces:
1131  if ch != u' ':
1132  if start+1 == end and self.column > self.best_width and split:
1133  self.write_indent()
1134  self.writespace = False
1135  self.indention = False
1136  else:
1137  data = text[start:end]
1138  self.column += len(data)
1139  if self.encoding:
1140  data = data.encode(self.encoding)
1141  self.stream.write(data)
1142  start = end
1143  elif breaks:
1144  if ch not in u'\n\x85\u2028\u2029':
1145  if text[start] == u'\n':
1146  self.write_line_break()
1147  for br in text[start:end]:
1148  if br == u'\n':
1149  self.write_line_break()
1150  else:
1151  self.write_line_break(br)
1152  self.write_indent()
1153  self.whitespace = False
1154  self.indention = False
1155  start = end
1156  else:
1157  if ch is None or ch in u' \n\x85\u2028\u2029':
1158  data = text[start:end]
1159  self.column += len(data)
1160  if self.encoding:
1161  data = data.encode(self.encoding)
1162  self.stream.write(data)
1163  start = end
1164  if ch is not None:
1165  spaces = (ch == u' ')
1166  breaks = (ch in u'\n\x85\u2028\u2029')
1167  end += 1
1168 
def expect_first_flow_mapping_key
Definition: emitter.py:310
def expect_first_document_start
Definition: emitter.py:174
def expect_first_block_sequence_item
Definition: emitter.py:367
def expect_block_mapping_key
Definition: emitter.py:389
def expect_flow_mapping_key
Definition: emitter.py:327
Fstring::size_type len(Fstring const &s)
Length.
Definition: Fstring.hh:2207
def expect_flow_mapping_value
Definition: emitter.py:353
def expect_flow_mapping_simple_value
Definition: emitter.py:348
def expect_block_mapping_value
Definition: emitter.py:408
def expect_flow_sequence_item
Definition: emitter.py:286
def write_version_directive
Definition: emitter.py:874
def expect_block_mapping_simple_value
Definition: emitter.py:403
def expect_block_sequence_item
Definition: emitter.py:370
def expect_first_block_mapping_key
Definition: emitter.py:386
def expect_first_flow_sequence_item
Definition: emitter.py:274