--[[# first-line-indent.lua – First line indentation filter Copyright: © 2021–2023 Contributors License: MIT – see LICENSE for details @TODO latex_quote should use options.size (or better, a specific option) @TODO option for leaving indents after headings (French style) @TODO smart setting of the post-heading style based on `lang` @TODO option to leave indent at the beginning of the document ]] PANDOC_VERSION:must_be_at_least '2.17' stringify = pandoc.utils.stringify equals = pandoc.utils.equals pandoctype = pandoc.utils.type -- # Options ---@class Options Options map with default values. ---@field format string|nil output format (currently: 'html' or 'latex') ---@field indent boolean whether to use first line indentation globally ---@field set_metadata_variable boolean whether to set the `indent` -- metadata variable. ---@field set_header_includes boolean whether to provide formatting code in -- header-includes. ---@field auto_remove_indents boolean whether to automatically remove -- indents after specific block types. ---@field remove_after table list of strings, Pandoc AST block types -- after which first-line indents should be automatically removed. ---@field remove_after_class table list of strings, classes of elements -- after which first-line indents should be automatically removed. ---@field dont_remove_after_class table list of strings, classes of elements -- after which first-line indents should not be removed. Prevails -- over remove_after. ---@field size string|nil a CSS / LaTeX specification of the first line -- indent length ---@field recursive table Pandoc Block types to --- which the filter is recursively applied, with options map. --- The option `dont_indent_first` controls whether indentation --- is removed on the first paragraph. local Options = { format = nil, indent = true, set_metadata_variable = true, set_header_includes = true, auto_remove = true, remove_after = pandoc.List({ 'BlockQuote', 'BulletList', 'CodeBlock', 'DefinitionList', 'HorizontalRule', 'OrderedList', }), remove_after_class = pandoc.List({ 'statement', }), dont_remove_after_class = pandoc.List:new(), size = nil, -- default let LaTeX decide size_default = '1.5em', -- default value for HTML recursive = { Div = {dont_indent_first = false}, BlockQuote = {dont_indent_first = true}, } } -- # Filter global variables ---@class code map pandoc objects for indent/noindent Raw code. local code = { tex = { indent = pandoc.RawInline('tex', '\\indent '), noindent = pandoc.RawInline('tex', '\\noindent '), }, latex = { indent = pandoc.RawInline('latex', '\\indent '), noindent = pandoc.RawInline('latex', '\\noindent '), }, html = { indent = pandoc.RawBlock('html', '
'), noindent = pandoc.RawBlock('html', '
'), } } ---LATEX_QUOTE_ENV: LaTeX's definition of the quote environement ---used to define HeaderIncludes. ---a \setlength{\parindent}{} will be appended ---@type string local LATEX_QUOTE_ENV = [[\makeatletter \renewenvironment{quote} {\list{}{\listparindent 1.5em% \itemindent \listparindent \rightmargin \leftmargin \parsep \z@ \@plus \p@}% \item\noindent\relax} {\endlist} \makeatother ]] ---@class HeaderIncludes map of functions to produce ---header-includes code given a size parameter (string|nil), --- either for global or for local indentation markup. --- optionally wrap the constructed global header markup (e.g. " end, latex = function(str) return str end, }, loc = { html = function(size) size = size or Options.size_default local code = [[ div.no-first-line-indent-after + p { text-indent: 0; } div.first-line-indent-after + p { text-indent: SIZE; } ]] return code:gsub("SIZE", size) end, latex = function(_) return '' end, } } -- # encapsulate Quarto/Pandoc variants ---format_match: whether format matches a string pattern ---ex: format_match('html5'), format_match('html*') ---in Quarto we try removing non-alphabetical chars ---@param pattern string ---@return boolean local function format_match(pattern) return quarto and (quarto.doc.is_format(pattern) or quarto.doc.is_format(pattern:gsub('%A','')) ) or FORMAT:match(pattern) end ---add_header_includes: add a block to the document's header-includes ---meta-data field. ---@param meta pandoc.Meta the document's metadata block ---@param blocks pandoc.Blocks list of Pandoc block elements (e.g. RawBlock or Para) --- to be added to the header-includes of meta ---@return pandoc.Meta meta the modified metadata block local function add_header_includes(meta, blocks) -- Pandoc local function pandoc_add_headinc(meta,blocks) local header_includes = pandoc.MetaList( { pandoc.MetaBlocks(blocks) }) -- add any exisiting meta['header-includes'] -- it can be MetaInlines, MetaBlocks or MetaList if meta['header-includes'] then if pandoctype(meta['header-includes']) == 'List' then header_includes:extend(meta['header-includes']) else header_includes:insert(meta['header-includes']) end end meta['header-includes'] = header_includes return meta end -- Quarto local function quarto_add_headinc(blocks) quarto.doc.include_text('in-header', stringify(blocks)) end return quarto and quarto_add_headinc(blocks) or pandoc_add_headinc(meta,blocks) end -- # Helper functions -- ensure_list: turns Inlines and Blocks meta values into list local function ensure_list(elem) if elem and (pandoctype(elem) == 'Inlines' or pandoctype(elem) == 'Blocks') then elem = pandoc.List:new(elem) end return elem end --- classes_include: check if one of an element's class is in a given -- list. Returns true if match, nil if no match or the element doesn't -- have classes. ---@param elem table pandoc AST element ---@param classes table pandoc List of strings local function classes_include(elem,classes) if elem.classes then for _,class in ipairs(classes) do if elem.classes:includes(class) then return true end end end end --- is_indent_cmd: check if an element is a LaTeX indent command ---@param elem pandoc.Inline ---@return string|nil 'indent', 'noindent' or nil -- local function is_indent_cmd(elem) -- return (equals(elem, code.latex.indent) -- or equals(elem, code.tex.indent)) and 'indent' -- or (equals(elem, code.latex.noindent) -- or equals(elem, code.tex.noindent)) and 'noindent' -- or nil -- end local function is_indent_cmd(elem) return elem.text and ( elem.text:match('^%s*\\indent%s*$') and 'indent' or elem.text:match('^%s*\\noindent%s*$') and 'noindent' ) or nil end -- # Filter functions --- Add format-specific explicit indent markup to a paragraph. --- Returns a list of blocks containing a single paragraph --- or a rawblock followed by a paragraph, depending on format. ---@param type string 'indent' or 'noindent', type of markup to add ---@param elem pandoc.Para ---@return pandoc.Blocks local function indent_markup(type, elem) local result = pandoc.List:new() if not (type == 'indent' or type == 'noindent') then result:insert(elem) elseif format_match('latex') then -- in LaTeX, replace any `\indent` or `\noindent` -- at the beginning of the paragraph with -- with the one corresponding to `type` if elem.content[1] and is_indent_cmd(elem.content[1]) then elem.content[1] = code.tex[type] else elem.content:insert(1, code.tex[type]) end result:insert(elem) elseif format_match('html') then result:extend({ code.html[type], elem }) end return result end --- process_blocks: process indentations in a list of blocks. -- Adds output code for explicitly specified first-line indents, -- automatically removes first-line indents after blocks of the -- designed types unless otherwise specified. ---@param blocks pandoc.Blocks element (list of blocks) ---@param dont_indent_first boolean whether to indent the first paragraph local function process_blocks(blocks, dont_indent_first) dont_indent_first = dont_indent_first or false -- tag for the first element local is_first_block = true -- tags the doc's first element -- tag to trigger indentation auto-removal on the next element local dont_indent_next_block = false local result = pandoc.List:new() for _,elem in pairs(blocks) do -- Paragraphs: if they have explicit LaTeX indent markup -- reproduce it in the output format, otherwise -- remove indentation if needed, provided `auto_remove` is on. if elem.t == "Para" then if elem.content[1] and is_indent_cmd(elem.content[1]) then -- 'indent' or 'noindent' ? local type = is_indent_cmd(elem.content[1]) result:extend(indent_markup(type, elem)) elseif is_first_block and dont_indent_first then result:extend(indent_markup('noindent', elem)) elseif dont_indent_next_block and Options.auto_remove then result:extend(indent_markup('noindent', elem)) else result:insert(elem) end dont_indent_next_block = false -- Non-Paragraphs: check first whether it's an element after -- which indentation must be removed. Next insert it, applying -- this function recursively within the element if needed. else if Options.auto_remove then if Options.remove_after:includes(elem.t) and not classes_include(elem, Options.dont_remove_after_class) then dont_indent_next_block = true elseif elem.classes and classes_include(elem, Options.remove_after_class) then dont_indent_next_block = true else dont_indent_next_block = false end end -- recursively process the element if needed if Options.recursive[elem.t] then local dif = Options.recursive[elem.t].dont_indent_first elem.content = process_blocks(elem.content, dif) end -- insert result:insert(elem) end -- ensure `is_first_block` turns to false -- even if the first block wasn't a paragraph -- or if it had explicit indent marking is_first_block = false end return result end --- process_doc: Process indents in the document's body text. -- Adds output code for explicitly specified first-line indents, -- automatically removes first-line indents after blocks of the -- designed types unless otherwise specified. local function process_doc(doc) local dont_indent_first = false -- if no output format, do nothing if not Options.format then return end -- if the doc has a title, do not indent first paragraph if doc.meta.title then dont_indent_first = true end doc.blocks = process_blocks(doc.blocks, dont_indent_first) return doc end --- read_user_options: read user options from meta element. -- in Quarto options may be under format/pdf or format/html -- the latter override root ones. local function read_user_options(meta) local user_options = {} if meta.indent == false then Options.indent = false end if meta['first-line-indent'] then user_options = meta['first-line-indent'] end local formats = {'pdf', 'html', 'latex'} if meta.format then for format in ipairs(formats) do if format_match(format) and meta.format[format] then for k,v in meta.format[format] do user_options[k] = v end end end end if user_options['set-metadata-variable'] == false then Options.set_metadata_variable = false end if user_options['set-header-includes'] == false then Options.set_header_includes = false end -- size -- @todo using stringify means that LaTeX commands in -- size are erased. But it ensures that the filter gets -- a string. Improvement: check that we have a string -- and throw a warning otherwise if user_options.size and pandoctype(user_options.size == 'Inlines') then Options.size = stringify(user_options.size) end if user_options['auto-remove'] == false then Options.auto_remove = false end -- autoremove elements and classes -- for elements we only need a whitelist, remove_after -- for classes we need both a whitelist (remove_after_class) -- and a blacklist (dont_remove_after_class). -- first insert user values in `remove_after`, `remove_after_class` -- and `dont_remove_after_class`. for optname, metakey in pairs({ remove_after = 'remove-after', remove_after_class = 'remove-after-class', dont_remove_after_class = 'dont-remove-after-class', }) do local user_value = ensure_list(user_options[metakey]) if user_value and pandoctype(user_value) == 'List' then for _,item in ipairs(user_value) do Options[optname]:insert(stringify(item)) end end end -- then remove blacklisted entries from `remove_after` -- and `remove_after_class`. for optname, metakey in pairs({ remove_after = 'dont-remove-after', remove_after_class = 'dont-remove-after-class' }) do local user_value = ensure_list(user_options[metakey]) if user_value and pandoctype(user_value) == 'List' then -- stringify the list for i,v in ipairs(user_value) do user_value[i] = stringify(v) end -- filter to that returns true iff an item isn't blacklisted local predicate = function (str) return not(user_value:includes(str)) end -- apply the filter to the whitelist Options[optname] = Options[optname]:filter(predicate) end end end --- set_meta: insert options in doc's meta --- Sets `indent` and extends `header-includes` if needed. ---@param meta pandoc.Meta ---@return pandoc.Meta|nil meta nil if no changes local function set_meta(meta) local changes = false -- only return if changes are made local header_code = nil local format = Options.format -- set the `indent` metadata variable unless otherwise specified or -- already set to false if Options.set_metadata_variable and not(meta.indent == false) then meta.indent = true changes = true end -- set the `header-includes` metadata variable if Options.set_header_includes and Options.indent then -- do we apply first line indentation globally? if Options.indent then header_code = HeaderIncludes.glob[format](Options.size) end -- provide local explicit indentation styles header_code = header_code .. HeaderIncludes.loc[format](Options.size) -- wrap the header if needed header_code = HeaderIncludes.wrap[format](header_code) -- insert if not empty if header_code ~= '' then add_header_includes(meta, { pandoc.RawBlock(format, header_code)}) changes = true end end return changes and meta or nil end --- process_metadata: process user options. -- read user options, set the `indent` metadata variable, -- add formatting code to `header-includes`. local function process_metadata(meta) local changes = false -- only return if changes are made local header_code = nil local format = format_match('html') and 'html' or (format_match('latex') and 'latex') if not format then return nil else Options.format = format end read_user_options(meta) -- places values in global `options` return set_meta(meta) end --- Main code -- Returns the filters in the desired order of execution return { { Meta = process_metadata }, { Pandoc = process_doc } }