Initial commit.

This commit is contained in:
yan
2011-11-17 15:45:33 -06:00
commit 882015bc6d
1819 changed files with 111625 additions and 0 deletions

View File

@@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
# == Base class for parsers
#
# This class serves as base class for parsers. It provides common methods that can/should be
# used by all parsers, especially by those using StringScanner for parsing.
#
class Base
# Initialize the parser with the given Kramdown document +doc+.
def initialize(doc)
@doc = doc
@text_type = :text
end
private_class_method(:new, :allocate)
# Parse the +source+ string into an element tree, using the information provided by the
# Kramdown document +doc+.
#
# Initializes a new instance of the calling class and then calls the #parse method that must
# be implemented by each subclass.
def self.parse(source, doc)
new(doc).parse(source)
end
# Add the given warning +text+ to the warning array of the Kramdown document.
def warning(text)
@doc.warnings << text
#TODO: add position information
end
# Modify the string +source+ to be usable by the parser.
def adapt_source(source)
source.gsub(/\r\n?/, "\n").chomp + "\n"
end
# This helper method adds the given +text+ either to the last element in the +tree+ if it is a
# +type+ element or creates a new text element with the given +type+.
def add_text(text, tree = @tree, type = @text_type)
if tree.children.last && tree.children.last.type == type
tree.children.last.value << text
elsif !text.empty?
tree.children << Element.new(type, text)
end
end
# Extract the part of the StringScanner +srcscan+ backed string specified by the +range+. This
# method also works correctly under Ruby 1.9.
def extract_string(range, strscan)
result = nil
if RUBY_VERSION >= '1.9'
begin
enc = strscan.string.encoding
strscan.string.force_encoding('ASCII-8BIT')
result = strscan.string[range].force_encoding(enc)
ensure
strscan.string.force_encoding(enc)
end
else
result = strscan.string[range]
end
result
end
end
end
end

View File

@@ -0,0 +1,482 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'rexml/parsers/baseparser'
require 'strscan'
module Kramdown
module Parser
# Used for parsing a HTML document.
class Html < Base
# Contains all constants that are used when parsing.
module Constants
#:stopdoc:
# The following regexps are based on the ones used by REXML, with some slight modifications.
HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/m
HTML_COMMENT_RE = /<!--(.*?)-->/m
HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/m
HTML_ENTITY_RE = /&([\w:][\-\w\d\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
map noscript object ol table tbody thead tfoot tr td ul}
HTML_PARSE_AS_SPAN = %w{a abbr acronym address b bdo big cite caption del dfn dt em
h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
rp rt rtc ruby samp select small span strong sub sup th tt var}
HTML_PARSE_AS_RAW = %w{script math option textarea pre code}
HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
# Some HTML elements like script belong to both categories (i.e. are valid in block and
# span HTML) and don't appear therefore!
HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
ins kbd label option q rb rbc rp rt rtc ruby samp select small span
strong sub sup textarea tt var}
HTML_BLOCK_ELEMENTS = %w{address article aside applet body button blockquote caption col colgroup dd div dl dt fieldset
figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend listing menu
li map nav ol optgroup p pre section summary table tbody td th thead tfoot tr ul}
HTML_ELEMENTS_WITHOUT_BODY = %w{area base br col command embed hr img input keygen link meta param source track wbr}
end
# Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
# functionality. The only thing that must be provided by the class are instance variable
# <tt>@stack</tt> for storing needed state and <tt>@src</tt> (instance of StringScanner) for
# the actual parsing.
module Parser
include Constants
# Process the HTML start tag that has already be scanned/checked. Does the common processing
# steps and then yields to the caller for further processing.
def handle_html_start_tag
name = @src[1]
closed = !@src[4].nil?
attrs = {}
@src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr] = val}
el = Element.new(:html_element, name, :attr => attrs, :category => :block)
@tree.children << el
if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
closed = true
end
if name == 'script'
handle_html_script_tag
yield(el, true)
else
yield(el, closed)
end
end
def handle_html_script_tag
curpos = @src.pos
if result = @src.scan_until(/(?=<\/script\s*>)/m)
add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
@src.scan(HTML_TAG_CLOSE_RE)
else
add_text(@src.scan(/.*/m), @tree.children.last, :raw)
warning("Found no end tag for 'script' - auto-closing it")
end
end
HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/
# Parse raw HTML from the current source position, storing the found elements in +el+.
# Parsing continues until one of the following criteria are fulfilled:
#
# - The end of the document is reached.
# - The matching end tag for the element +el+ is found (only used if +el+ is an HTML
# element).
#
# When an HTML start tag is found, processing is deferred to #handle_html_start_tag,
# providing the block given to this method.
def parse_raw_html(el, &block)
@stack.push(@tree)
@tree = el
done = false
while !@src.eos? && !done
if result = @src.scan_until(HTML_RAW_START)
add_text(result, @tree, :text)
if result = @src.scan(HTML_COMMENT_RE)
@tree.children << Element.new(:xml_comment, result, :category => :block, :parent_is_raw => true)
elsif result = @src.scan(HTML_INSTRUCTION_RE)
@tree.children << Element.new(:xml_pi, result, :category => :block, :parent_is_raw => true)
elsif @src.scan(HTML_TAG_RE)
handle_html_start_tag(&block)
elsif @src.scan(HTML_TAG_CLOSE_RE)
if @tree.value == @src[1]
done = true
else
warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
end
else
add_text(@src.scan(/./), @tree, :text)
end
else
result = @src.scan(/.*/m)
add_text(result, @tree, :text)
warning("Found no end tag for '#{@tree.value}' - auto-closing it") if @tree.type == :html_element
done = true
end
end
@tree = @stack.pop
end
end
# Converts HTML elements to native elements if possible.
class ElementConverter
include Constants
include ::Kramdown::Utils::Entities
REMOVE_TEXT_CHILDREN = %w{html head hgroup ol ul dl table colgroup tbody thead tfoot tr select optgroup}
WRAP_TEXT_CHILDREN = %w{body section nav article aside header footer address div li dd blockquote figure
figcaption fieldset form}
REMOVE_WHITESPACE_CHILDREN = %w{body section nav article aside header footer address
div li dd blockquote figure figcaption td th fieldset form}
STRIP_WHITESPACE = %w{address article aside blockquote body caption dd div dl dt fieldset figcaption form footer
header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
SIMPLE_ELEMENTS = %w{em strong blockquote hr br a img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
def initialize(doc)
@doc = doc
end
# Convert the element +el+ and its children.
def process(el, do_conversion = true, preserve_text = false, parent = nil)
case el.type
when :xml_comment, :xml_pi, :html_doctype
ptype = if parent.nil?
'div'
else
case parent.type
when :html_element then parent.value
when :code_span then 'code'
when :code_block then 'pre'
when :header then 'h1'
else parent.type.to_s
end
end
el.options = {:category => HTML_PARSE_AS_SPAN.include?(ptype) ? :span : :block}
return
when :html_element
else return
end
type = el.value
remove_text_children(el) if REMOVE_TEXT_CHILDREN.include?(type)
mname = "convert_#{el.value}"
if do_conversion && self.class.method_defined?(mname)
send(mname, el)
elsif do_conversion && SIMPLE_ELEMENTS.include?(type)
set_basics(el, type.intern, HTML_SPAN_ELEMENTS.include?(type) ? :span : :block)
process_children(el, do_conversion, preserve_text)
else
process_html_element(el, do_conversion, preserve_text)
end
strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
end
def process_children(el, do_conversion = true, preserve_text = false)
el.children.map! do |c|
if c.type == :text
process_text(c.value, preserve_text)
else
process(c, do_conversion, preserve_text, el)
c
end
end.flatten!
end
# Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
# entities in entity elements.
def process_text(raw, preserve = false)
raw.gsub!(/\s+/, ' ') unless preserve
src = StringScanner.new(raw)
result = []
while !src.eos?
if tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/)
result << Element.new(:text, tmp)
src.scan(HTML_ENTITY_RE)
val = src[1] || (src[2] && src[2].to_i) || src[3].hex
result << if %w{lsquo rsquo ldquo rdquo}.include?(val)
Element.new(:smart_quote, val.intern)
elsif %w{mdash ndash hellip laquo raquo}.include?(val)
Element.new(:typographic_sym, val.intern)
else
Element.new(:entity, entity(val))
end
else
result << Element.new(:text, src.scan(/.*/m))
end
end
result
end
def process_html_element(el, do_conversion = true, preserve_text = false)
el.options = {:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
:parse_type => HTML_PARSE_AS[el.value],
:attr => el.options[:attr]
}
process_children(el, do_conversion, preserve_text)
end
def remove_text_children(el)
el.children.delete_if {|c| c.type == :text}
end
SPAN_ELEMENTS = [:em, :strong, :br, :a, :img, :codespan, :entity, :smart_quote, :typographic_sym, :math]
def wrap_text_children(el)
tmp = []
last_is_p = false
el.children.each do |c|
if c.options[:category] != :block || c.type == :text
if !last_is_p
tmp << Element.new(:p, nil, :transparent => true)
last_is_p = true
end
tmp.last.children << c
tmp
else
tmp << c
last_is_p = false
end
end
el.children = tmp
end
def strip_whitespace(el)
return if el.children.empty?
if el.children.first.type == :text
el.children.first.value.lstrip!
end
if el.children.last.type == :text
el.children.last.value.rstrip!
end
end
def remove_whitespace_children(el)
i = -1
el.children.delete_if do |c|
i += 1
c.type == :text && c.value.strip.empty? &&
(i == 0 || i == el.children.length - 1 || (el.children[i-1].options[:category] == :block &&
el.children[i+1].options[:category] == :block))
end
end
def set_basics(el, type, category, opts = {})
el.type = type
el.options = {:category => category, :attr => el.options[:attr]}.merge(opts)
el.value = nil
end
def extract_text(el, raw)
raw << el.value.to_s if el.type == :text
el.children.each {|c| extract_text(c, raw)}
end
def convert_h1(el)
set_basics(el, :header, :block, :level => el.value[1..1].to_i)
extract_text(el, el.options[:raw_text] = '')
process_children(el)
end
%w{h2 h3 h4 h5 h6}.each do |i|
alias_method("convert_#{i}".to_sym, :convert_h1)
end
def convert_code(el)
raw = ''
extract_text(el, raw)
result = process_text(raw, true)
begin
str = result.inject('') do |mem, c|
if c.type == :text
mem << c.value
elsif c.type == :entity
if RUBY_VERSION >= '1.9'
mem << c.value.char.encode(@doc.parse_infos[:encoding])
elsif [60, 62, 34, 38].include?(c.value.code_point)
mem << c.value.code_point.chr
end
elsif c.type == :smart_quote || c.type == :typographic_sym
mem << entity(c.value.to_s).char.encode(@doc.parse_infos[:encoding])
else
raise "Bug - please report"
end
end
result.clear
result << Element.new(:text, str)
rescue
end
if result.length > 1 || result.first.type != :text
process_html_element(el, false, true)
else
if el.value == 'code'
set_basics(el, :codespan, :span)
else
set_basics(el, :codeblock, :block)
end
el.value = result.first.value
end
end
alias :convert_pre :convert_code
def convert_table(el)
if !is_simple_table?(el)
process_html_element(el, false)
return
end
process_children(el)
set_basics(el, :table, :block)
el.options[:alignment] = []
calc_alignment = lambda do |c|
if c.type == :tr && el.options[:alignment].empty?
el.options[:alignment] = [:default] * c.children.length
break
else
c.children.each {|cc| calc_alignment.call(cc)}
end
end
calc_alignment.call(el)
if el.children.first.type == :tr
tbody = Element.new(:tbody, nil, :category => :block)
tbody.children = el.children
el.children = [tbody]
end
end
def is_simple_table?(el)
only_phrasing_content = lambda do |c|
c.children.all? do |cc|
(cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
end
end
check_cells = Proc.new do |c|
if c.value == 'th' || c.value == 'td'
return false if !only_phrasing_content.call(c)
else
c.children.each {|cc| check_cells.call(cc)}
end
end
check_cells.call(el)
check_rows = lambda do |t, type|
t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}}
end
check_rows.call(el, 'td') ||
(el.children.all? do |t|
t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
end && el.children.any? {|t| t.value == 'tbody'})
end
def convert_div(el)
if !is_math_tag?(el)
process_html_element(el)
else
handle_math_tag(el)
end
end
alias :convert_span :convert_div
def is_math_tag?(el)
el.options[:attr] && el.options[:attr]['class'].to_s =~ /\bmath\b/ &&
el.children.size == 1 && el.children.first.type == :text
end
def handle_math_tag(el)
set_basics(el, :math, (el.value == 'div' ? :block : :span))
el.value = el.children.shift.value
if el.options[:attr]['class'] =~ /^\s*math\s*$/
el.options[:attr].delete('class')
else
el.options[:attr]['class'].sub!(/\s?math/, '')
end
el.value.gsub!(/&(amp|quot|gt|lt);/) do |m|
case m
when '&amp;' then '&'
when '&quot;' then '"'
when '&gt;' then '>'
when '&lt;' then '<'
end
end
end
end
include Parser
# Parse +source+ as HTML document and return the created +tree+.
def parse(source)
@stack = []
@tree = Element.new(:root)
@src = StringScanner.new(adapt_source(source))
while true
if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
@tree.children << Element.new(:xml_pi, result.strip, :category => :block)
elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
@tree.children << Element.new(:html_doctype, result.strip, :category => :block)
elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
@tree.children << Element.new(:xml_comment, result.strip, :category => :block)
else
break
end
end
tag_handler = lambda do |c, closed|
parse_raw_html(c, &tag_handler) if !closed
end
parse_raw_html(@tree, &tag_handler)
ec = ElementConverter.new(@doc)
@tree.children.each {|c| ec.process(c)}
ec.remove_whitespace_children(@tree)
@tree
end
end
end
end

View File

@@ -0,0 +1,303 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'strscan'
require 'stringio'
#TODO: use [[:alpha:]] in all regexp to allow parsing of international values in 1.9.1
#NOTE: use @src.pre_match only before other check/match?/... operations, otherwise the content is changed
module Kramdown
module Parser
# Used for parsing a document in kramdown format.
#
# If you want to extend the functionality of the parser, you need to the following:
#
# * Create a new subclass
# * add the needed parser methods
# * modify the @block_parsers and @span_parsers variables and add the names of your parser
# methods
#
# Here is a small example for an extended parser class that parses ERB style tags as raw text if
# they are used as span level elements (an equivalent block level parser should probably also be
# made to handle the block case):
#
# require 'kramdown/parser/kramdown'
#
# class Kramdown::Parser::ERBKramdown < Kramdown::Parser::Kramdown
#
# def initialize(doc)
# super(doc)
# @span_parsers.unshift(:erb_tags)
# end
#
# ERB_TAGS_START = /<%.*?%>/
#
# def parse_erb_tags
# @src.pos += @src.matched_size
# @tree.children << Element.new(:raw, @src.matched)
# end
# define_parser(:erb_tags, ERB_TAGS_START, '<%')
#
# end
#
# The new parser can be used like this:
#
# require 'kramdown/document'
# # require the file with the above parser class
#
# Kramdown::Document.new(input_text, :input => 'ERBKramdown').to_html
#
class Kramdown < Base
include ::Kramdown
attr_reader :tree
attr_reader :doc
attr_reader :options
# Create a new Kramdown parser object for the Kramdown::Document +doc+.
def initialize(doc)
super(doc)
@src = nil
@tree = nil
@stack = []
@text_type = :raw_text
@block_ial = nil
@doc.parse_infos[:ald] = {}
@doc.parse_infos[:link_defs] = {}
@doc.parse_infos[:abbrev_defs] = {}
@doc.parse_infos[:footnotes] = {}
@block_parsers = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :table, :atx_header,
:setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
:footnote_definition, :abbrev_definition, :ald, :block_math,
:block_extension, :block_ial, :eob_marker, :paragraph]
@span_parsers = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link, :smart_quotes, :inline_math,
:span_extension, :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
end
private_class_method(:new, :allocate)
# The source string provided on initialization is parsed and the created +tree+ is returned.
def parse(source)
configure_parser
tree = Element.new(:root)
parse_blocks(tree, adapt_source(source))
update_tree(tree)
replace_abbreviations(tree)
@doc.parse_infos[:footnotes].each do |name, data|
update_tree(data[:content])
end
tree
end
#######
protected
#######
# Adapt the object to allow parsing like specified in the options.
def configure_parser
@parsers = {}
(@block_parsers + @span_parsers).each do |name|
if self.class.has_parser?(name)
@parsers[name] = self.class.parser(name)
else
raise Kramdown::Error, "Unknown parser: #{name}"
end
end
@span_start, @span_start_re = span_parser_regexps
end
# Create the needed span parser regexps.
def span_parser_regexps(parsers = @span_parsers)
span_start = /#{parsers.map {|name| @parsers[name].span_start}.join('|')}/
[span_start, /(?=#{span_start})/]
end
# Parse all block level elements in +text+ into the element +el+.
def parse_blocks(el, text = nil)
@stack.push([@tree, @src])
@tree, @src = el, (text.nil? ? @src : StringScanner.new(text))
status = catch(:stop_block_parsing) do
while !@src.eos?
block_ial_set = @block_ial
@block_parsers.any? do |name|
if @src.check(@parsers[name].start_re)
send(@parsers[name].method)
else
false
end
end || begin
warning('Warning: this should not occur - no block parser handled the line')
add_text(@src.scan(/.*\n/))
end
@block_ial = nil if block_ial_set
end
end
@tree, @src = *@stack.pop
status
end
# Update the tree by parsing all <tt>:raw_text</tt> elements with the span level parser
# (resets +@tree+, +@src+ and the +@stack+) and by updating the attributes from the IALs.
def update_tree(element)
element.children.map! do |child|
if child.type == :raw_text
@stack, @tree, @text_type = [], nil, :text
@src = StringScanner.new(child.value)
parse_spans(child)
child.children
elsif child.type == :eob
[]
else
update_tree(child)
update_attr_with_ial(child.options[:attr] ||= {}, child.options[:ial]) if child.options[:ial]
child
end
end.flatten!
end
# Parse all span level elements in the source string.
def parse_spans(el, stop_re = nil, parsers = nil, text_type = @text_type)
@stack.push([@tree, @text_type]) unless @tree.nil?
@tree, @text_type = el, text_type
span_start = @span_start
span_start_re = @span_start_re
span_start, span_start_re = span_parser_regexps(parsers) if parsers
parsers = parsers || @span_parsers
used_re = (stop_re.nil? ? span_start_re : /(?=#{Regexp.union(stop_re, span_start)})/)
stop_re_found = false
while !@src.eos? && !stop_re_found
if result = @src.scan_until(used_re)
add_text(result)
if stop_re && (stop_re_matched = @src.check(stop_re))
stop_re_found = (block_given? ? yield : true)
end
processed = parsers.any? do |name|
if @src.check(@parsers[name].start_re)
send(@parsers[name].method)
true
else
false
end
end unless stop_re_found
add_text(@src.scan(/./)) if !processed && !stop_re_found
else
add_text(@src.scan(/.*/m)) unless stop_re
break
end
end
@tree, @text_type = @stack.pop
stop_re_found
end
# Update the attributes with the information from the inline attribute list and all referenced ALDs.
def update_attr_with_ial(attr, ial)
ial[:refs].each do |ref|
update_attr_with_ial(attr, ref) if ref = @doc.parse_infos[:ald][ref]
end if ial[:refs]
attr['class'] = ((attr['class'] || '') + " #{ial['class']}").lstrip if ial['class']
ial.each {|k,v| attr[k] = v if k.kind_of?(String) && k != 'class' }
end
# Create a new block level element, taking care of applying a preceding block IAL if it exists.
def new_block_el(*args)
el = Element.new(*args)
el.options[:category] ||= :block
el.options[:ial] = @block_ial if @block_ial && el.type != :blank && el.type != :eob
el
end
@@parsers = {}
# Holds all the needed data for one block/span level parser.
Data = Struct.new(:name, :start_re, :span_start, :method)
# Add a parser method
#
# * with the given +name+,
# * using +start_re+ as start regexp
# * and, for span parsers, +span_start+ as a String that can be used in a regexp and
# which identifies the starting character(s)
#
# to the registry. The method name is automatically derived from the +name+ or can explicitly
# be set by using the +meth_name+ parameter.
def self.define_parser(name, start_re, span_start = nil, meth_name = "parse_#{name}")
raise "A parser with the name #{name} already exists!" if @@parsers.has_key?(name)
@@parsers[name] = Data.new(name, start_re, span_start, meth_name)
end
# Return the Data structure for the parser +name+.
def self.parser(name = nil)
@@parsers[name]
end
# Return +true+ if there is a parser called +name+.
def self.has_parser?(name)
@@parsers.has_key?(name)
end
INDENT = /^(?:\t| {4})/
OPT_SPACE = / {0,3}/
require 'kramdown/parser/kramdown/blank_line'
require 'kramdown/parser/kramdown/eob'
require 'kramdown/parser/kramdown/paragraph'
require 'kramdown/parser/kramdown/header'
require 'kramdown/parser/kramdown/blockquote'
require 'kramdown/parser/kramdown/table'
require 'kramdown/parser/kramdown/codeblock'
require 'kramdown/parser/kramdown/horizontal_rule'
require 'kramdown/parser/kramdown/list'
require 'kramdown/parser/kramdown/link'
require 'kramdown/parser/kramdown/attribute_list'
require 'kramdown/parser/kramdown/extension'
require 'kramdown/parser/kramdown/footnote'
require 'kramdown/parser/kramdown/html'
require 'kramdown/parser/kramdown/escaped_chars'
require 'kramdown/parser/kramdown/html_entity'
require 'kramdown/parser/kramdown/line_break'
require 'kramdown/parser/kramdown/typographic_symbol'
require 'kramdown/parser/kramdown/autolink'
require 'kramdown/parser/kramdown/codespan'
require 'kramdown/parser/kramdown/emphasis'
require 'kramdown/parser/kramdown/smart_quotes'
require 'kramdown/parser/kramdown/math'
require 'kramdown/parser/kramdown/abbreviation'
end
end
end

View File

@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
ABBREV_DEFINITION_START = /^#{OPT_SPACE}\*\[(.+?)\]:(.*?)\n/
# Parse the link definition at the current location.
def parse_abbrev_definition
@src.pos += @src.matched_size
abbrev_id, abbrev_text = @src[1], @src[2].strip
warning("Duplicate abbreviation ID '#{abbrev_id}' - overwriting") if @doc.parse_infos[:abbrev_defs][abbrev_id]
@doc.parse_infos[:abbrev_defs][abbrev_id] = abbrev_text
true
end
define_parser(:abbrev_definition, ABBREV_DEFINITION_START)
# Replace the abbreviation text with elements.
def replace_abbreviations(el, regexps = nil)
return if @doc.parse_infos[:abbrev_defs].empty?
if !regexps
regexps = [Regexp.union(*@doc.parse_infos[:abbrev_defs].keys.map {|k| /#{Regexp.escape(k)}/})]
regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
end
el.children.map! do |child|
if child.type == :text
result = []
strscan = StringScanner.new(child.value)
while temp = strscan.scan_until(regexps.last)
temp += strscan.scan(/\W|^/)
abbr = strscan.scan(regexps.first)
result += [Element.new(:text, temp), Element.new(:abbreviation, abbr)]
end
result + [Element.new(:text, extract_string(strscan.pos..-1, strscan))]
else
replace_abbreviations(child, regexps)
child
end
end.flatten!
end
end
end
end

View File

@@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
# Parse the string +str+ and extract all attributes and add all found attributes to the hash
# +opts+.
def parse_attribute_list(str, opts)
str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref|
if ref
(opts[:refs] ||= []) << ref
elsif class_attr
opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip
elsif id_attr
opts['id'] = id_attr
else
opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1")
end
end
end
# Update the +ial+ with the information from the inline attribute list +opts+.
def update_ial_with_ial(ial, opts)
(ial[:refs] ||= []) << opts[:refs]
ial['class'] = ((ial['class'] || '') + " #{opts['class']}").lstrip if opts['class']
opts.each {|k,v| ial[k] = v if k != :refs && k != 'class' }
end
ALD_ID_CHARS = /[\w\d-]/
ALD_ANY_CHARS = /\\\}|[^\}]/
ALD_ID_NAME = /(?:\w|\d)#{ALD_ID_CHARS}*/
ALD_TYPE_KEY_VALUE_PAIR = /(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])*?)\2/
ALD_TYPE_CLASS_NAME = /\.(#{ALD_ID_NAME})/
ALD_TYPE_ID_NAME = /#(#{ALD_ID_NAME})/
ALD_TYPE_REF = /(#{ALD_ID_NAME})/
ALD_TYPE_ANY = /(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/
ALD_START = /^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/
# Parse the attribute list definition at the current location.
def parse_ald
@src.pos += @src.matched_size
parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= {})
@tree.children << Element.new(:eob)
true
end
define_parser(:ald, ALD_START)
IAL_BLOCK_START = /^#{OPT_SPACE}\{:(?!:)(#{ALD_ANY_CHARS}+)\}\s*?\n/
# Parse the inline attribute list at the current location.
def parse_block_ial
@src.pos += @src.matched_size
if @tree.children.last && @tree.children.last.type != :blank && @tree.children.last.type != :eob
parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= {})
else
parse_attribute_list(@src[1], @block_ial = {})
end
@tree.children << Element.new(:eob) unless @src.check(IAL_BLOCK_START)
true
end
define_parser(:block_ial, IAL_BLOCK_START)
IAL_SPAN_START = /\{:(#{ALD_ANY_CHARS}+)\}/
# Parse the inline attribute list at the current location.
def parse_span_ial
@src.pos += @src.matched_size
if @tree.children.last && @tree.children.last.type != :text
attr = {}
parse_attribute_list(@src[1], attr)
update_ial_with_ial(@tree.children.last.options[:ial] ||= {}, attr)
update_attr_with_ial(@tree.children.last.options[:attr] ||= {}, attr)
else
warning("Ignoring span IAL because preceding element is just text")
end
end
define_parser(:span_ial, IAL_SPAN_START, '\{:')
end
end
end

View File

@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
if RUBY_VERSION == '1.8.5'
ACHARS = '\x80-\xFF'
else
ACHARS = ''
end
AUTOLINK_START = /<((mailto|https?|ftps?):.+?|[-.\w#{ACHARS}]+@[-\w#{ACHARS}]+(\.[-\w#{ACHARS}]+)*\.[a-z]+)>/u
# Parse the autolink at the current location.
def parse_autolink
@src.pos += @src.matched_size
href = @src[1]
href= "mailto:#{href}" if @src[2].nil?
el = Element.new(:a, nil, {:attr => {'href' => href}})
add_text(@src[1].sub(/^mailto:/, ''), el)
@tree.children << el
end
define_parser(:autolink, AUTOLINK_START, '<')
end
end
end

View File

@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
BLANK_LINE = /(?:^\s*\n)+/
# Parse the blank line at the current postition.
def parse_blank_line
@src.pos += @src.matched_size
if @tree.children.last && @tree.children.last.type == :blank
@tree.children.last.value += @src.matched
else
@tree.children << new_block_el(:blank, @src.matched)
end
true
end
define_parser(:blank_line, BLANK_LINE)
end
end
end

View File

@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
BLOCKQUOTE_START = /^#{OPT_SPACE}> ?/
BLOCKQUOTE_MATCH = /(^#{OPT_SPACE}>.*?\n)+/
# Parse the blockquote at the current location.
def parse_blockquote
result = @src.scan(BLOCKQUOTE_MATCH).gsub(BLOCKQUOTE_START, '')
el = new_block_el(:blockquote)
@tree.children << el
parse_blocks(el, result)
true
end
define_parser(:blockquote, BLOCKQUOTE_START)
end
end
end

View File

@@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/parser/kramdown/blank_line'
module Kramdown
module Parser
class Kramdown
CODEBLOCK_START = INDENT
CODEBLOCK_LINE = /(?:#{INDENT}.*?\S.*?\n)+/
CODEBLOCK_MATCH = /(?:#{BLANK_LINE}?#{CODEBLOCK_LINE})*/
# Parse the indented codeblock at the current location.
def parse_codeblock
@tree.children << new_block_el(:codeblock, @src.scan(CODEBLOCK_MATCH).gsub!(INDENT, ''))
true
end
define_parser(:codeblock, CODEBLOCK_START)
FENCED_CODEBLOCK_START = /^~{3,}/
FENCED_CODEBLOCK_MATCH = /^(~{3,})\s*?\n(.*?)^\1~*\s*?\n/m
# Parse the fenced codeblock at the current location.
def parse_codeblock_fenced
if @src.check(FENCED_CODEBLOCK_MATCH)
@src.pos += @src.matched_size
@tree.children << new_block_el(:codeblock, @src[2])
true
else
false
end
end
define_parser(:codeblock_fenced, FENCED_CODEBLOCK_START)
end
end
end

View File

@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
CODESPAN_DELIMITER = /`+/
# Parse the codespan at the current scanner location.
def parse_codespan
result = @src.scan(CODESPAN_DELIMITER)
simple = (result.length == 1)
reset_pos = @src.pos
if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/)
add_text(result)
return
end
text = @src.scan_until(/#{result}/)
if text
text.sub!(/#{result}\Z/, '')
if !simple
text = text[1..-1] if text[0..0] == ' '
text = text[0..-2] if text[-1..-1] == ' '
end
@tree.children << Element.new(:codespan, text)
else
@src.pos = reset_pos
add_text(result)
end
end
define_parser(:codespan, CODESPAN_DELIMITER, '`')
end
end
end

View File

@@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
EMPHASIS_START = /(?:\*\*?|__?)/
# Parse the emphasis at the current location.
def parse_emphasis
result = @src.scan(EMPHASIS_START)
element = (result.length == 2 ? :strong : :em)
type = (result =~ /_/ ? '_' : '*')
reset_pos = @src.pos
if (type == '_' && @src.pre_match =~ /[[:alpha:]]\z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/) ||
@tree.type == element || @stack.any? {|el, _| el.type == element}
add_text(result)
return
end
sub_parse = lambda do |delim, elem|
el = Element.new(elem)
stop_re = /#{Regexp.escape(delim)}/
found = parse_spans(el, stop_re) do
(@src.pre_match[-1, 1] !~ /\s/) &&
(elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) &&
(type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0
end
[found, el, stop_re]
end
found, el, stop_re = sub_parse.call(result, element)
if !found && element == :strong && @tree.type != :em
@src.pos = reset_pos - 1
found, el, stop_re = sub_parse.call(type, :em)
end
if found
@src.scan(stop_re)
@tree.children << el
else
@src.pos = reset_pos
add_text(result)
end
end
define_parser(:emphasis, EMPHASIS_START, '\*|_')
end
end
end

View File

@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
EOB_MARKER = /^\^\s*?\n/
# Parse the EOB marker at the current location.
def parse_eob_marker
@src.pos += @src.matched_size
@tree.children << new_block_el(:eob)
true
end
define_parser(:eob_marker, EOB_MARKER)
end
end
end

View File

@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
ESCAPED_CHARS = /\\([\\.*_+`()\[\]{}#!:|"'\$-])/
# Parse the backslash-escaped character at the current location.
def parse_escaped_chars
@src.pos += @src.matched_size
add_text(@src[1])
end
define_parser(:escaped_chars, ESCAPED_CHARS, '\\\\')
end
end
end

View File

@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/parser/kramdown/attribute_list'
module Kramdown
module Parser
class Kramdown
def parse_extension_start_tag(type)
@src.pos += @src.matched_size
if @src[4] || @src.matched == '{:/}'
name = (@src[4] ? "for '#{@src[4]}' " : '')
warning("Invalid extension stop tag #{name}found - ignoring it")
return
end
ext = @src[1]
opts = {}
body = nil
parse_attribute_list(@src[2] || '', opts)
if !@src[3]
stop_re = (type == :block ? /#{EXT_BLOCK_STOP_STR % ext}/ : /#{EXT_STOP_STR % ext}/)
if result = @src.scan_until(stop_re)
body = result.sub!(stop_re, '')
body.chomp! if type == :block
else
warning("No stop tag for extension '#{ext}' found - treating it as extension without body")
end
end
handle_extension(ext, opts, body, type)
end
def handle_extension(name, opts, body, type)
case name
when 'comment'
@tree.children << Element.new(:comment, body, :category => type) if body.kind_of?(String)
when 'nomarkdown'
@tree.children << Element.new(:raw, body, :category => type) if body.kind_of?(String)
when 'options'
opts.select do |k,v|
k = k.to_sym
if Kramdown::Options.defined?(k)
@doc.options[k] = Kramdown::Options.parse(k, v) rescue @doc.options[k]
false
else
true
end
end.each do |k,v|
warning("Unknown kramdown option '#{k}'")
end
else
warning("Invalid extension name '#{name}' specified - ignoring extension")
end
end
EXT_STOP_STR = "\\{:/(%s)?\\}"
EXT_START_STR = "\\{::(\\w+)(?:\\s(#{ALD_ANY_CHARS}*?)|)(\\/)?\\}"
EXT_SPAN_START = /#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME}/
EXT_BLOCK_START = /^#{OPT_SPACE}(?:#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME})\s*?\n/
EXT_BLOCK_STOP_STR = "^#{OPT_SPACE}#{EXT_STOP_STR}\s*?\n"
# Parse the extension block at the current location.
def parse_block_extension
parse_extension_start_tag(:block)
true
end
define_parser(:block_extension, EXT_BLOCK_START)
# Parse the extension span at the current location.
def parse_span_extension
parse_extension_start_tag(:span)
end
define_parser(:span_extension, EXT_SPAN_START, '\{:[:/]')
end
end
end

View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/parser/kramdown/attribute_list'
require 'kramdown/parser/kramdown/blank_line'
require 'kramdown/parser/kramdown/codeblock'
module Kramdown
module Parser
class Kramdown
FOOTNOTE_DEFINITION_START = /^#{OPT_SPACE}\[\^(#{ALD_ID_NAME})\]:\s*?(.*?\n(?:#{BLANK_LINE}?#{CODEBLOCK_LINE})*)/
# Parse the foot note definition at the current location.
def parse_footnote_definition
@src.pos += @src.matched_size
el = Element.new(:footnote_def)
parse_blocks(el, @src[2].gsub(INDENT, ''))
warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @doc.parse_infos[:footnotes][@src[1]]
(@doc.parse_infos[:footnotes][@src[1]] = {})[:content] = el
true
end
define_parser(:footnote_definition, FOOTNOTE_DEFINITION_START)
FOOTNOTE_MARKER_START = /\[\^(#{ALD_ID_NAME})\]/
# Parse the footnote marker at the current location.
def parse_footnote_marker
@src.pos += @src.matched_size
fn_def = @doc.parse_infos[:footnotes][@src[1]]
if fn_def
valid = fn_def[:marker] && fn_def[:marker].options[:stack][0..-2].zip(fn_def[:marker].options[:stack][1..-1]).all? do |par, child|
par.children.include?(child)
end
if !fn_def[:marker] || !valid
fn_def[:marker] = Element.new(:footnote, nil, :name => @src[1])
fn_def[:marker].options[:stack] = [@stack.map {|s| s.first}, @tree, fn_def[:marker]].flatten.compact
@tree.children << fn_def[:marker]
else
warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker")
add_text(@src.matched)
end
else
warning("Footnote definition for '#{@src[1]}' not found")
add_text(@src.matched)
end
end
define_parser(:footnote_marker, FOOTNOTE_MARKER_START, '\[')
end
end
end

View File

@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
HEADER_ID=/(?:[ \t]\{#((?:\w|\d)[\w\d-]*)\})?/
SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
# Parse the Setext header at the current location.
def parse_setext_header
if @tree.children.last && @tree.children.last.type != :blank
return false
end
@src.pos += @src.matched_size
text, id, level = @src[1].strip, @src[2], @src[3]
el = new_block_el(:header, nil, :level => (level == '-' ? 2 : 1), :raw_text => text)
add_text(text, el)
el.options[:attr] = {'id' => id} if id
@tree.children << el
true
end
define_parser(:setext_header, SETEXT_HEADER_START)
ATX_HEADER_START = /^\#{1,6}/
ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
# Parse the Atx header at the current location.
def parse_atx_header
if @tree.children.last && @tree.children.last.type != :blank
return false
end
result = @src.scan(ATX_HEADER_MATCH)
level, text, id = @src[1], @src[2].strip, @src[3]
el = new_block_el(:header, nil, :level => level.length, :raw_text => text)
add_text(text, el)
el.options[:attr] = {'id' => id} if id
@tree.children << el
true
end
define_parser(:atx_header, ATX_HEADER_START)
end
end
end

View File

@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
HR_START = /^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
# Parse the horizontal rule at the current location.
def parse_horizontal_rule
@src.pos += @src.matched_size
@tree.children << new_block_el(:hr)
true
end
define_parser(:horizontal_rule, HR_START)
end
end
end

View File

@@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/parser/html'
module Kramdown
module Parser
class Kramdown
include Kramdown::Parser::Html::Parser
def handle_kramdown_html_tag(el, closed)
parse_type = if @tree.type != :html_element || @tree.options[:parse_type] != :raw
(@doc.options[:parse_block_html] ? HTML_PARSE_AS[el.value] : :raw)
else
:raw
end
if val = html_parse_type(el.options[:attr].delete('markdown'))
parse_type = (val == :default ? HTML_PARSE_AS[el.value] : val)
end
@src.scan(/[ \t]*\n/) if parse_type == :block
el.options[:outer_element] = true if @tree.type != :html_element
el.options[:parent_is_raw] = true if @tree.type == :html_element && @tree.options[:parse_type] == :raw
el.options[:parse_type] = parse_type
if !closed
if parse_type == :block
end_tag_found = parse_blocks(el)
if !end_tag_found
warning("Found no end tag for '#{el.value}' - auto-closing it")
end
elsif parse_type == :span
curpos = @src.pos
if result = @src.scan_until(/(?=<\/#{el.value}\s*>)/m)
add_text(extract_string(curpos...@src.pos, @src), el)
@src.scan(HTML_TAG_CLOSE_RE)
else
add_text(@src.scan(/.*/m), el)
warning("Found no end tag for '#{el.value}' - auto-closing it")
end
else
parse_raw_html(el, &method(:handle_kramdown_html_tag))
end
@src.scan(/[ \t]*\n/) unless (@tree.type == :html_element && @tree.options[:parse_type] == :raw)
end
end
# Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
# (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
# +val+ is nil, then the default parsing mode is used.
def html_parse_type(val)
case val
when "0" then :raw
when "1" then :default
when "span" then :span
when "block" then :block
when NilClass then nil
else
warning("Invalid markdown attribute val '#{val}', using default")
nil
end
end
HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
# Parse the HTML at the current position as block level HTML.
def parse_block_html
if result = @src.scan(HTML_COMMENT_RE)
@tree.children << Element.new(:xml_comment, result, :category => :block)
@src.scan(/[ \t]*\n/)
true
elsif result = @src.scan(HTML_INSTRUCTION_RE)
@tree.children << Element.new(:xml_pi, result, :category => :block)
@src.scan(/[ \t]*\n/)
true
else
if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
@src.pos += @src.matched_size
handle_html_start_tag(&method(:handle_kramdown_html_tag))
Kramdown::Parser::Html::ElementConverter.new(@doc).process(@tree.children.last) if @doc.options[:html_to_native]
true
elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
@src.pos += @src.matched_size
name = @src[1]
if @tree.type == :html_element && @tree.value == name
throw :stop_block_parsing, :found
else
warning("Found invalidly used HTML closing tag for '#{name}' - ignoring it")
true
end
else
false
end
end
end
define_parser(:block_html, HTML_BLOCK_START)
HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
# Parse the HTML at the current position as span level HTML.
def parse_span_html
if result = @src.scan(HTML_COMMENT_RE)
@tree.children << Element.new(:xml_comment, result, :category => :span)
elsif result = @src.scan(HTML_INSTRUCTION_RE)
@tree.children << Element.new(:xml_pi, result, :category => :span)
elsif result = @src.scan(HTML_TAG_CLOSE_RE)
warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
elsif result = @src.scan(HTML_TAG_RE)
return if HTML_BLOCK_ELEMENTS.include?(@src[1])
reset_pos = @src.pos
attrs = {}
@src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
do_parsing = (HTML_PARSE_AS_RAW.include?(@src[1]) || @tree.options[:parse_type] == :raw ? false : @doc.options[:parse_span_html])
if val = html_parse_type(attrs.delete('markdown'))
if val == :block
warning("Cannot use block level parsing in span level HTML tag - using default mode")
elsif val == :span
do_parsing = true
elsif val == :default
do_parsing = !HTML_PARSE_AS_RAW.include?(@src[1])
elsif val == :raw
do_parsing = false
end
end
el = Element.new(:html_element, @src[1], :attr => attrs, :category => :span, :parse_type => (do_parsing ? :span : :raw))
@tree.children << el
stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
if !@src[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
elsif !@src[4]
if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html]))
@src.scan(stop_re)
else
warning("Found no end tag for '#{el.value}' - auto-closing it")
add_text(@src.scan(/.*/m), el)
end
end
Kramdown::Parser::Html::ElementConverter.new(@doc).process(el) if @doc.options[:html_to_native]
else
add_text(@src.scan(/./))
end
end
define_parser(:span_html, HTML_SPAN_START, '<')
end
end
end

View File

@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/parser/html'
module Kramdown
module Parser
class Kramdown
# Parse the HTML entity at the current location.
def parse_html_entity
@src.pos += @src.matched_size
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity(@src[1] || (@src[2] && @src[2].to_i) || @src[3].hex))
end
define_parser(:html_entity, Kramdown::Parser::Html::Constants::HTML_ENTITY_RE, '&')
end
end
end

View File

@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
LINE_BREAK = /( |\\\\)(?=\n)/
# Parse the line break at the current location.
def parse_line_break
@src.pos += @src.matched_size
@tree.children << Element.new(:br)
end
define_parser(:line_break, LINE_BREAK, '( |\\\\)(?=\n)')
end
end
end

View File

@@ -0,0 +1,156 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
PUNCTUATION_CHARS = "_.:,;!?-"
LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
LINK_ID_NON_CHARS = /[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
LINK_DEFINITION_START = /^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^\s]+))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
# Parse the link definition at the current location.
def parse_link_definition
@src.pos += @src.matched_size
link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5]
warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id]
@doc.parse_infos[:link_defs][link_id] = [link_url, link_title]
true
end
define_parser(:link_definition, LINK_DEFINITION_START)
# This helper methods adds the approriate attributes to the element +el+ of type +a+ or +img+
# and the element itself to the <tt>@tree</tt>.
def add_link(el, href, title, alt_text = nil)
el.options[:attr] ||= {}
el.options[:attr]['title'] = title if title
if el.type == :a
el.options[:attr]['href'] = href
else
el.options[:attr]['src'] = href
el.options[:attr]['alt'] = alt_text
el.children.clear
end
@tree.children << el
end
LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/
LINK_INLINE_ID_RE = /\s*?\[(#{LINK_ID_CHARS}+)?\]/
LINK_INLINE_TITLE_RE = /\s*?(["'])(.+?)\1\s*?\)/
LINK_START = /!?\[(?=[^^])/
# Parse the link at the current scanner position. This method is used to parse normal links as
# well as image links.
def parse_link
result = @src.scan(LINK_START)
reset_pos = @src.pos
link_type = (result =~ /^!/ ? :img : :a)
# no nested links allowed
if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)})
add_text(result)
return
end
el = Element.new(link_type)
stop_re = /\]|!?\[/
count = 1
found = parse_spans(el, stop_re) do
case @src.matched
when "[", "!["
count += 1
when "]"
count -= 1
end
count - el.children.select {|c| c.type == :img}.size == 0
end
if !found || (link_type == :a && el.children.empty?)
@src.pos = reset_pos
add_text(result)
return
end
alt_text = extract_string(reset_pos...@src.pos, @src)
conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase
@src.scan(stop_re)
# reference style link or no link url
if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/)
link_id = (@src[1] || conv_link_id).downcase
if link_id.empty?
@src.pos = reset_pos
add_text(result)
elsif @doc.parse_infos[:link_defs].has_key?(link_id)
add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text)
else
warning("No link definition for link ID '#{link_id}' found")
@src.pos = reset_pos
add_text(result)
end
return
end
# link url in parentheses
if @src.scan(/\(<(.*?)>/)
link_url = @src[1]
if @src.scan(/\)/)
add_link(el, link_url, nil, alt_text)
return
end
else
link_url = ''
re = /\(|\)|\s/
nr_of_brackets = 0
while temp = @src.scan_until(re)
link_url += temp
case @src.matched
when /\s/
break
when '('
nr_of_brackets += 1
when ')'
nr_of_brackets -= 1
break if nr_of_brackets == 0
end
end
link_url = link_url[1..-2]
if nr_of_brackets == 0
add_link(el, link_url, nil, alt_text)
return
end
end
if @src.scan(LINK_INLINE_TITLE_RE)
add_link(el, link_url, @src[2], alt_text)
else
@src.pos = reset_pos
add_text(result)
end
end
define_parser(:link, LINK_START, '!?\[')
end
end
end

View File

@@ -0,0 +1,232 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/parser/kramdown/blank_line'
require 'kramdown/parser/kramdown/eob'
require 'kramdown/parser/kramdown/horizontal_rule'
require 'kramdown/parser/kramdown/attribute_list'
module Kramdown
module Parser
class Kramdown
# Used for parsing the first line of a list item or a definition, i.e. the line with list item
# marker or the definition marker.
def parse_first_list_line(indentation, content)
if content =~ /^\s*(#{IAL_SPAN_START})?\s*\n/
indentation = 4
else
while content =~ /^ *\t/
temp = content.scan(/^ */).first.length + indentation
content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
end
indentation += content.scan(/^ */).first.length
end
content.sub!(/^\s*/, '')
indent_re = /^ {#{indentation}}/
content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
[content, indentation, content_re, indent_re]
end
LIST_START_UL = /^(#{OPT_SPACE}[+*-])([\t| ].*?\n)/
LIST_START_OL = /^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/
LIST_START = /#{LIST_START_UL}|#{LIST_START_OL}/
# Parse the ordered or unordered list at the current location.
def parse_list
if @tree.children.last && @tree.children.last.type == :p # last element must not be a paragraph
return false
end
type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL])
list = new_block_el(type)
item = nil
indent_re = nil
content_re = nil
eob_found = false
nested_list_found = false
while !@src.eos?
if @src.check(HR_START)
break
elsif @src.scan(list_start_re)
item = Element.new(:li)
item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
list.children << item
item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match|
parse_attribute_list($~[1], item.options[:ial] ||= {})
''
end
list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
/^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
nested_list_found = false
elsif result = @src.scan(content_re)
result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
result.sub!(indent_re, '')
if !nested_list_found && result =~ LIST_START
parse_blocks(item, item.value)
if item.children.length == 1 && item.children.first.type == :p
item.value = ''
else
item.children.clear
end
nested_list_found = true
end
item.value << result
elsif result = @src.scan(BLANK_LINE)
nested_list_found = true
item.value << result
elsif @src.scan(EOB_MARKER)
eob_found = true
break
else
break
end
end
@tree.children << list
last = nil
list.children.each do |it|
temp = Element.new(:temp)
parse_blocks(temp, it.value)
it.children += temp.children
it.value = nil
next if it.children.size == 0
if it.children.first.type == :p && (it.children.length < 2 || it.children[1].type != :blank ||
(it == list.children.last && it.children.length == 2 && !eob_found)) &&
(list.children.last != it || list.children.size == 1 ||
list.children[0..-2].any? {|cit| cit.children.first.type != :p || cit.children.first.options[:transparent]})
it.children.first.children.first.value += "\n" if it.children.size > 1 && it.children[1].type != :blank
it.children.first.options[:transparent] = true
end
if it.children.last.type == :blank
last = it.children.pop
else
last = nil
end
end
@tree.children << last if !last.nil? && !eob_found
true
end
define_parser(:list, LIST_START)
DEFINITION_LIST_START = /^(#{OPT_SPACE}:)([\t| ].*?\n)/
# Parse the ordered or unordered list at the current location.
def parse_definition_list
children = @tree.children
if !children.last || (children.length == 1 && children.last.type != :p ) ||
(children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
return false
end
first_as_para = false
deflist = new_block_el(:dl)
para = @tree.children.pop
if para.type == :blank
para = @tree.children.pop
first_as_para = true
end
para.children.first.value.split("\n").each do |term|
el = Element.new(:dt)
el.children << Element.new(:raw_text, term)
deflist.children << el
end
item = nil
indent_re = nil
content_re = nil
def_start_re = DEFINITION_LIST_START
while !@src.eos?
if @src.scan(def_start_re)
item = Element.new(:dd)
item.options[:first_as_para] = first_as_para
item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
deflist.children << item
item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match|
parse_attribute_list($~[1], item.options[:ial] ||= {})
''
end
def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
first_as_para = false
elsif result = @src.scan(content_re)
result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
result.sub!(indent_re, '')
item.value << result
first_as_para = false
elsif result = @src.scan(BLANK_LINE)
first_as_para = true
item.value << result
else
break
end
end
last = nil
deflist.children.each do |it|
next if it.type == :dt
parse_blocks(it, it.value)
it.value = nil
next if it.children.size == 0
if it.children.last.type == :blank
last = it.children.pop
else
last = nil
end
if it.children.first.type == :p && !it.options.delete(:first_as_para)
it.children.first.children.first.value += "\n" if it.children.size > 1
it.children.first.options[:transparent] = true
end
end
if @tree.children.length >= 1 && @tree.children.last.type == :dl
@tree.children[-1].children += deflist.children
elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
@tree.children.pop
@tree.children[-1].children += deflist.children
else
@tree.children << deflist
end
@tree.children << last if !last.nil?
true
end
define_parser(:definition_list, DEFINITION_LIST_START)
end
end
end

View File

@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
BLOCK_MATH_START = /^#{OPT_SPACE}(\\)?\$\$(.*?)\$\$\s*?\n/m
# Parse the math block at the current location.
def parse_block_math
if @src[1]
@src.scan(/^#{OPT_SPACE}\\/)
return false
end
@src.pos += @src.matched_size
@tree.children << new_block_el(:math, @src[2], :category => :block)
true
end
define_parser(:block_math, BLOCK_MATH_START)
INLINE_MATH_START = /\$\$(.*?)\$\$/
# Parse the inline math at the current location.
def parse_inline_math
@src.pos += @src.matched_size
@tree.children << Element.new(:math, @src[1], :category => :span)
end
define_parser(:inline_math, INLINE_MATH_START, '\$')
end
end
end

View File

@@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
PARAGRAPH_START = /^#{OPT_SPACE}[^ \t].*?\n/
# Parse the paragraph at the current location.
def parse_paragraph
@src.pos += @src.matched_size
if @tree.children.last && @tree.children.last.type == :p
@tree.children.last.children.first.value << "\n" << @src.matched.chomp
else
@tree.children << new_block_el(:p)
add_text(@src.matched.lstrip.chomp, @tree.children.last)
end
true
end
define_parser(:paragraph, PARAGRAPH_START)
end
end
end

View File

@@ -0,0 +1,214 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
#--
# Parts of this file are based on code from Maruku by Andrea Censi.
# The needed license statements follow:
#
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# NOTA BENE:
#
# The following algorithm is a rip-off of RubyPants written by
# Christian Neukirchen.
#
# RubyPants is a Ruby port of SmartyPants written by John Gruber.
#
# This file is distributed under the GPL, which I guess is compatible
# with the terms of the RubyPants license.
#
# -- Andrea Censi
#
# = RubyPants -- SmartyPants ported to Ruby
#
# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
# Copyright (C) 2004 Christian Neukirchen
#
# Incooporates ideas, comments and documentation by Chad Miller
# Copyright (C) 2004 Chad Miller
#
# Original SmartyPants by John Gruber
# Copyright (C) 2003 John Gruber
#
#
# = RubyPants -- SmartyPants ported to Ruby
#
#
# [snip]
#
# == Authors
#
# John Gruber did all of the hard work of writing this software in
# Perl for Movable Type and almost all of this useful documentation.
# Chad Miller ported it to Python to use with Pyblosxom.
#
# Christian Neukirchen provided the Ruby port, as a general-purpose
# library that follows the *Cloth API.
#
#
# == Copyright and License
#
# === SmartyPants license:
#
# Copyright (c) 2003 John Gruber
# (http://daringfireball.net)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# * Neither the name "SmartyPants" nor the names of its contributors
# may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
# === RubyPants license
#
# RubyPants is a derivative work of SmartyPants and smartypants.py.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
# == Links
#
# John Gruber:: http://daringfireball.net
# SmartyPants:: http://daringfireball.net/projects/smartypants
#
# Chad Miller:: http://web.chad.org
#
# Christian Neukirchen:: http://kronavita.de/chris
#
#++
#
module Kramdown
module Parser
class Kramdown
SQ_PUNCT = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
SQ_CLOSE = %![^\ \\\\\t\r\n\\[{(-]!
SQ_RULES = [
[/("|')(?=#{SQ_PUNCT}\B)/, [:rquote1]],
# Special case for double sets of quotes, e.g.:
# <p>He said, "'Quoted' words in a larger quote."</p>
[/(\s?)"'(?=\w)/, [1, :ldquo, :lsquo]],
[/(\s?)'"(?=\w)/, [1, :lsquo, :ldquo]],
# Special case for decade abbreviations (the '80s):
[/(\s?)'(?=\d\ds)/, [1, :rsquo]],
# Get most opening single/double quotes:
[/(\s)('|")(?=\w)/, [1, :lquote2]],
# Single/double closing quotes:
[/(#{SQ_CLOSE})('|")/, [1, :rquote2]],
# Special case for e.g. "<i>Custer</i>'s Last Stand."
[/("|')(\s|s\b|$)/, [:rquote1, 2]],
# Any remaining single quotes should be opening ones:
[/(.?)'/m, [1, :lsquo]],
[/(.?)"/m, [1, :ldquo]],
] #'"
SQ_SUBSTS = {
[:rquote1, '"'] => :rdquo,
[:rquote1, "'"] => :rsquo,
[:rquote2, '"'] => :rdquo,
[:rquote2, "'"] => :rsquo,
[:lquote1, '"'] => :ldquo,
[:lquote1, "'"] => :lsquo,
[:lquote2, '"'] => :ldquo,
[:lquote2, "'"] => :lsquo,
}
SMART_QUOTES_RE = /[^\\]?["']/
# Parse the smart quotes at current location.
def parse_smart_quotes
regexp, substs = SQ_RULES.find {|reg, subst| @src.scan(reg)}
substs.each do |subst|
if subst.kind_of?(Integer)
add_text(@src[subst].to_s)
else
val = SQ_SUBSTS[[subst, @src[subst.to_s[-1,1].to_i]]] || subst
@tree.children << Element.new(:smart_quote, val)
end
end
end
define_parser(:smart_quotes, SMART_QUOTES_RE, '[^\\\\]?["\']')
end
end
end

View File

@@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/parser/kramdown/blank_line'
require 'kramdown/parser/kramdown/eob'
require 'kramdown/parser/kramdown/horizontal_rule'
module Kramdown
module Parser
class Kramdown
TABLE_SEP_LINE = /^#{OPT_SPACE}(?:\||\+)([ ]?:?-[+|: -]*)[ \t]*\n/
TABLE_HSEP_ALIGN = /[ ]?(:?)-+(:?)[ ]?/
TABLE_FSEP_LINE = /^#{OPT_SPACE}(\||\+)[ ]?:?=[+|: =]*[ \t]*\n/
TABLE_ROW_LINE = /^#{OPT_SPACE}\|(.*?)[ \t]*\n/
TABLE_START = /^#{OPT_SPACE}\|(?:-|(?!=))/
# Parse the table at the current location.
def parse_table
orig_pos = @src.pos
table = new_block_el(:table, nil, :alignment => [])
@src.scan(TABLE_SEP_LINE)
rows = []
has_footer = false
columns = 0
add_container = lambda do |type, force|
if force || type != :tbody || !has_footer
cont = Element.new(type)
cont.children, rows = rows, []
table.children << cont
end
end
while !@src.eos?
if @src.scan(TABLE_SEP_LINE) && !rows.empty?
if table.options[:alignment].empty? && !has_footer
add_container.call(:thead, false)
table.options[:alignment] = @src[1].scan(TABLE_HSEP_ALIGN).map do |left, right|
(left.empty? && right.empty? && :default) || (right.empty? && :left) || (left.empty? && :right) || :center
end
else # treat as normal separator line
add_container.call(:tbody, false)
end
elsif @src.scan(TABLE_FSEP_LINE)
add_container.call(:tbody, true) if !rows.empty?
has_footer = true
elsif @src.scan(TABLE_ROW_LINE)
trow = Element.new(:tr)
cells = (@src[1] + ' ').split(/\|/)
i = 0
while i < cells.length - 1
backslashes = cells[i].scan(/\\+$/).first
if backslashes && backslashes.length % 2 == 1
cells[i] = cells[i].chop + '|' + cells[i+1]
cells.delete_at(i+1)
else
i += 1
end
end
cells.pop if cells.last.strip.empty?
cells.each do |cell_text|
tcell = Element.new(:td)
tcell.children << Element.new(:raw_text, cell_text.strip)
trow.children << tcell
end
columns = [columns, cells.length].max
rows << trow
else
break
end
end
add_container.call(has_footer ? :tfoot : :tbody, false) if !rows.empty?
if !table.children.any? {|c| c.type == :tbody}
warning("Found table without body - ignoring it")
@src.pos = orig_pos
return false
end
# adjust all table rows to have equal number of columns, same for alignment defs
table.children.each do |kind|
kind.children.each do |row|
(columns - row.children.length).times do
row.children << Element.new(:td)
end
row.children.each {|el| el.type = :th} if kind.type == :thead
end
end
if table.options[:alignment].length > columns
table.options[:alignment] = table.options[:alignment][0...columns]
else
table.options[:alignment] += [:default] * (columns - table.options[:alignment].length)
end
@tree.children << table
true
end
define_parser(:table, TABLE_START)
end
end
end

View File

@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
module Kramdown
module Parser
class Kramdown
TYPOGRAPHIC_SYMS = [['---', :mdash], ['--', :ndash], ['...', :hellip],
['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
['<< ', :laquo_space], [' >>', :raquo_space],
['<<', :laquo], ['>>', :raquo]]
TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
# Parse the typographic symbols at the current location.
def parse_typographic_syms
@src.pos += @src.matched_size
val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
if val.kind_of?(Symbol)
@tree.children << Element.new(:typographic_sym, val)
elsif @src.matched == '\\<<'
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('lt'))
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('lt'))
else
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('gt'))
@tree.children << Element.new(:entity, ::Kramdown::Utils::Entities.entity('gt'))
end
end
define_parser(:typographic_syms, TYPOGRAPHIC_SYMS_RE, '--|\\.\\.\\.|(?:\\\\| )?(?:<<|>>)')
end
end
end