Spaces:
Running
Running
--[[# first-line-indent.lua β First line indentation filter | |
Copyright: Β© 2021β2023 Contributors | |
License: MIT β see LICENSE for details | |
@TODO latex_quote should use options.size (or better, a specific option) | |
@TODO option for leaving indents after headings (French style) | |
@TODO smart setting of the post-heading style based on `lang` | |
@TODO option to leave indent at the beginning of the document | |
]] | |
PANDOC_VERSION:must_be_at_least '2.17' | |
stringify = pandoc.utils.stringify | |
equals = pandoc.utils.equals | |
pandoctype = pandoc.utils.type | |
-- # Options | |
---@class Options Options map with default values. | |
---@field format string|nil output format (currently: 'html' or 'latex') | |
---@field indent boolean whether to use first line indentation globally | |
---@field set_metadata_variable boolean whether to set the `indent` | |
-- metadata variable. | |
---@field set_header_includes boolean whether to provide formatting code in | |
-- header-includes. | |
---@field auto_remove_indents boolean whether to automatically remove | |
-- indents after specific block types. | |
---@field remove_after table list of strings, Pandoc AST block types | |
-- after which first-line indents should be automatically removed. | |
---@field remove_after_class table list of strings, classes of elements | |
-- after which first-line indents should be automatically removed. | |
---@field dont_remove_after_class table list of strings, classes of elements | |
-- after which first-line indents should not be removed. Prevails | |
-- over remove_after. | |
---@field size string|nil a CSS / LaTeX specification of the first line | |
-- indent length | |
---@field recursive table<string, options> Pandoc Block types to | |
--- which the filter is recursively applied, with options map. | |
--- The option `dont_indent_first` controls whether indentation | |
--- is removed on the first paragraph. | |
local Options = { | |
format = nil, | |
indent = true, | |
set_metadata_variable = true, | |
set_header_includes = true, | |
auto_remove = true, | |
remove_after = pandoc.List({ | |
'BlockQuote', | |
'BulletList', | |
'CodeBlock', | |
'DefinitionList', | |
'HorizontalRule', | |
'OrderedList', | |
}), | |
remove_after_class = pandoc.List({ | |
'statement', | |
}), | |
dont_remove_after_class = pandoc.List:new(), | |
size = nil, -- default let LaTeX decide | |
size_default = '1.5em', -- default value for HTML | |
recursive = { | |
Div = {dont_indent_first = false}, | |
BlockQuote = {dont_indent_first = true}, | |
} | |
} | |
-- # Filter global variables | |
---@class code map pandoc objects for indent/noindent Raw code. | |
local code = { | |
tex = { | |
indent = pandoc.RawInline('tex', '\\indent '), | |
noindent = pandoc.RawInline('tex', '\\noindent '), | |
}, | |
latex = { | |
indent = pandoc.RawInline('latex', '\\indent '), | |
noindent = pandoc.RawInline('latex', '\\noindent '), | |
}, | |
html = { | |
indent = pandoc.RawBlock('html', | |
'<div class="first-line-indent-after"></div>'), | |
noindent = pandoc.RawBlock('html', | |
'<div class="no-first-line-indent-after"></div>'), | |
} | |
} | |
---LATEX_QUOTE_ENV: LaTeX's definition of the quote environement | |
---used to define HeaderIncludes. | |
---a \setlength{\parindent}{<size>} will be appended | |
---@type string | |
local LATEX_QUOTE_ENV = [[\makeatletter | |
\renewenvironment{quote} | |
{\list{}{\listparindent 1.5em% | |
\itemindent \listparindent | |
\rightmargin \leftmargin | |
\parsep \z@ \@plus \p@}% | |
\item\noindent\relax} | |
{\endlist} | |
\makeatother | |
]] | |
---@class HeaderIncludes map of functions to produce | |
---header-includes code given a size parameter (string|nil), | |
--- either for global or for local indentation markup. | |
--- optionally wrap the constructed global header markup (e.g. <style> tags). | |
--- glob = {html : function, latex: function} | |
--- wrap = {html : function, latex: function} | |
--- loc = {html : function, latex: function} | |
HeaderIncludes = { | |
glob = { | |
html = function(size) | |
size = size or Options.size_default | |
local code = [[ p { | |
text-indent: SIZE; | |
-- Javed | |
-- here you can define th margin to be zero or do not touch it | |
-- margin: 0; | |
} | |
header p { | |
text-indent: 0; | |
margin: 1em 0; | |
} | |
:is(h1, h2, h3, h4, h5, h6) + p { | |
text-indent: 0; | |
} | |
li > p, li > div p { | |
text-indent: 0; | |
margin-bottom: 1rem; | |
} | |
]] | |
return code:gsub("SIZE", size) | |
end, | |
latex = function(size) | |
local size_code = size and '\\setlength{\\parindent}{'..size..'}\n' | |
or '' | |
return LATEX_QUOTE_ENV .. size_code | |
end, | |
}, | |
wrap = { | |
html = function(header_str) | |
return "<style>\n/* first-line indent styles */\n" .. header_str | |
.. "/* end of first-line indent styles */\n</style>" | |
end, | |
latex = function(str) return str end, | |
}, | |
loc = { | |
html = function(size) | |
size = size or Options.size_default | |
local code = [[ div.no-first-line-indent-after + p { | |
text-indent: 0; | |
} | |
div.first-line-indent-after + p { | |
text-indent: SIZE; | |
} | |
]] | |
return code:gsub("SIZE", size) | |
end, | |
latex = function(_) return '' end, | |
} | |
} | |
-- # encapsulate Quarto/Pandoc variants | |
---format_match: whether format matches a string pattern | |
---ex: format_match('html5'), format_match('html*') | |
---in Quarto we try removing non-alphabetical chars | |
---@param pattern string | |
---@return boolean | |
local function format_match(pattern) | |
return quarto and (quarto.doc.is_format(pattern) | |
or quarto.doc.is_format(pattern:gsub('%A','')) | |
) | |
or FORMAT:match(pattern) | |
end | |
---add_header_includes: add a block to the document's header-includes | |
---meta-data field. | |
---@param meta pandoc.Meta the document's metadata block | |
---@param blocks pandoc.Blocks list of Pandoc block elements (e.g. RawBlock or Para) | |
--- to be added to the header-includes of meta | |
---@return pandoc.Meta meta the modified metadata block | |
local function add_header_includes(meta, blocks) | |
-- Pandoc | |
local function pandoc_add_headinc(meta,blocks) | |
local header_includes = pandoc.MetaList( { pandoc.MetaBlocks(blocks) }) | |
-- add any exisiting meta['header-includes'] | |
-- it can be MetaInlines, MetaBlocks or MetaList | |
if meta['header-includes'] then | |
if pandoctype(meta['header-includes']) == 'List' then | |
header_includes:extend(meta['header-includes']) | |
else | |
header_includes:insert(meta['header-includes']) | |
end | |
end | |
meta['header-includes'] = header_includes | |
return meta | |
end | |
-- Quarto | |
local function quarto_add_headinc(blocks) | |
quarto.doc.include_text('in-header', stringify(blocks)) | |
end | |
return quarto and quarto_add_headinc(blocks) | |
or pandoc_add_headinc(meta,blocks) | |
end | |
-- # Helper functions | |
-- ensure_list: turns Inlines and Blocks meta values into list | |
local function ensure_list(elem) | |
if elem and (pandoctype(elem) == 'Inlines' | |
or pandoctype(elem) == 'Blocks') then | |
elem = pandoc.List:new(elem) | |
end | |
return elem | |
end | |
--- classes_include: check if one of an element's class is in a given | |
-- list. Returns true if match, nil if no match or the element doesn't | |
-- have classes. | |
---@param elem table pandoc AST element | |
---@param classes table pandoc List of strings | |
local function classes_include(elem,classes) | |
if elem.classes then | |
for _,class in ipairs(classes) do | |
if elem.classes:includes(class) then return true end | |
end | |
end | |
end | |
--- is_indent_cmd: check if an element is a LaTeX indent command | |
---@param elem pandoc.Inline | |
---@return string|nil 'indent', 'noindent' or nil | |
-- local function is_indent_cmd(elem) | |
-- return (equals(elem, code.latex.indent) | |
-- or equals(elem, code.tex.indent)) and 'indent' | |
-- or (equals(elem, code.latex.noindent) | |
-- or equals(elem, code.tex.noindent)) and 'noindent' | |
-- or nil | |
-- end | |
local function is_indent_cmd(elem) | |
return elem.text and ( | |
elem.text:match('^%s*\\indent%s*$') and 'indent' | |
or elem.text:match('^%s*\\noindent%s*$') and 'noindent' | |
) | |
or nil | |
end | |
-- # Filter functions | |
--- Add format-specific explicit indent markup to a paragraph. | |
--- Returns a list of blocks containing a single paragraph | |
--- or a rawblock followed by a paragraph, depending on format. | |
---@param type string 'indent' or 'noindent', type of markup to add | |
---@param elem pandoc.Para | |
---@return pandoc.Blocks | |
local function indent_markup(type, elem) | |
local result = pandoc.List:new() | |
if not (type == 'indent' or type == 'noindent') then | |
result:insert(elem) | |
elseif format_match('latex') then | |
-- in LaTeX, replace any `\indent` or `\noindent` | |
-- at the beginning of the paragraph with | |
-- with the one corresponding to `type` | |
if elem.content[1] and is_indent_cmd(elem.content[1]) then | |
elem.content[1] = code.tex[type] | |
else | |
elem.content:insert(1, code.tex[type]) | |
end | |
result:insert(elem) | |
elseif format_match('html') then | |
result:extend({ code.html[type], elem }) | |
end | |
return result | |
end | |
--- process_blocks: process indentations in a list of blocks. | |
-- Adds output code for explicitly specified first-line indents, | |
-- automatically removes first-line indents after blocks of the | |
-- designed types unless otherwise specified. | |
---@param blocks pandoc.Blocks element (list of blocks) | |
---@param dont_indent_first boolean whether to indent the first paragraph | |
local function process_blocks(blocks, dont_indent_first) | |
dont_indent_first = dont_indent_first or false | |
-- tag for the first element | |
local is_first_block = true -- tags the doc's first element | |
-- tag to trigger indentation auto-removal on the next element | |
local dont_indent_next_block = false | |
local result = pandoc.List:new() | |
for _,elem in pairs(blocks) do | |
-- Paragraphs: if they have explicit LaTeX indent markup | |
-- reproduce it in the output format, otherwise | |
-- remove indentation if needed, provided `auto_remove` is on. | |
if elem.t == "Para" then | |
if elem.content[1] and is_indent_cmd(elem.content[1]) then | |
-- 'indent' or 'noindent' ? | |
local type = is_indent_cmd(elem.content[1]) | |
result:extend(indent_markup(type, elem)) | |
elseif is_first_block and dont_indent_first then | |
result:extend(indent_markup('noindent', elem)) | |
elseif dont_indent_next_block and Options.auto_remove then | |
result:extend(indent_markup('noindent', elem)) | |
else | |
result:insert(elem) | |
end | |
dont_indent_next_block = false | |
-- Non-Paragraphs: check first whether it's an element after | |
-- which indentation must be removed. Next insert it, applying | |
-- this function recursively within the element if needed. | |
else | |
if Options.auto_remove then | |
if Options.remove_after:includes(elem.t) and | |
not classes_include(elem, Options.dont_remove_after_class) then | |
dont_indent_next_block = true | |
elseif elem.classes and | |
classes_include(elem, Options.remove_after_class) then | |
dont_indent_next_block = true | |
else | |
dont_indent_next_block = false | |
end | |
end | |
-- recursively process the element if needed | |
if Options.recursive[elem.t] then | |
local dif = Options.recursive[elem.t].dont_indent_first | |
elem.content = process_blocks(elem.content, dif) | |
end | |
-- insert | |
result:insert(elem) | |
end | |
-- ensure `is_first_block` turns to false | |
-- even if the first block wasn't a paragraph | |
-- or if it had explicit indent marking | |
is_first_block = false | |
end | |
return result | |
end | |
--- process_doc: Process indents in the document's body text. | |
-- Adds output code for explicitly specified first-line indents, | |
-- automatically removes first-line indents after blocks of the | |
-- designed types unless otherwise specified. | |
local function process_doc(doc) | |
local dont_indent_first = false | |
-- if no output format, do nothing | |
if not Options.format then return end | |
-- if the doc has a title, do not indent first paragraph | |
if doc.meta.title then | |
dont_indent_first = true | |
end | |
doc.blocks = process_blocks(doc.blocks, dont_indent_first) | |
return doc | |
end | |
--- read_user_options: read user options from meta element. | |
-- in Quarto options may be under format/pdf or format/html | |
-- the latter override root ones. | |
local function read_user_options(meta) | |
local user_options = {} | |
if meta.indent == false then | |
Options.indent = false | |
end | |
if meta['first-line-indent'] then | |
user_options = meta['first-line-indent'] | |
end | |
local formats = {'pdf', 'html', 'latex'} | |
if meta.format then | |
for format in ipairs(formats) do | |
if format_match(format) and meta.format[format] then | |
for k,v in meta.format[format] do | |
user_options[k] = v | |
end | |
end | |
end | |
end | |
if user_options['set-metadata-variable'] == false then | |
Options.set_metadata_variable = false | |
end | |
if user_options['set-header-includes'] == false then | |
Options.set_header_includes = false | |
end | |
-- size | |
-- @todo using stringify means that LaTeX commands in | |
-- size are erased. But it ensures that the filter gets | |
-- a string. Improvement: check that we have a string | |
-- and throw a warning otherwise | |
if user_options.size and pandoctype(user_options.size == 'Inlines') then | |
Options.size = stringify(user_options.size) | |
end | |
if user_options['auto-remove'] == false then | |
Options.auto_remove = false | |
end | |
-- autoremove elements and classes | |
-- for elements we only need a whitelist, remove_after | |
-- for classes we need both a whitelist (remove_after_class) | |
-- and a blacklist (dont_remove_after_class). | |
-- first insert user values in `remove_after`, `remove_after_class` | |
-- and `dont_remove_after_class`. | |
for optname, metakey in pairs({ | |
remove_after = 'remove-after', | |
remove_after_class = 'remove-after-class', | |
dont_remove_after_class = 'dont-remove-after-class', | |
}) do | |
local user_value = ensure_list(user_options[metakey]) | |
if user_value and pandoctype(user_value) == 'List' then | |
for _,item in ipairs(user_value) do | |
Options[optname]:insert(stringify(item)) | |
end | |
end | |
end | |
-- then remove blacklisted entries from `remove_after` | |
-- and `remove_after_class`. | |
for optname, metakey in pairs({ | |
remove_after = 'dont-remove-after', | |
remove_after_class = 'dont-remove-after-class' | |
}) do | |
local user_value = ensure_list(user_options[metakey]) | |
if user_value and pandoctype(user_value) == 'List' then | |
-- stringify the list | |
for i,v in ipairs(user_value) do | |
user_value[i] = stringify(v) | |
end | |
-- filter to that returns true iff an item isn't blacklisted | |
local predicate = function (str) | |
return not(user_value:includes(str)) | |
end | |
-- apply the filter to the whitelist | |
Options[optname] = Options[optname]:filter(predicate) | |
end | |
end | |
end | |
--- set_meta: insert options in doc's meta | |
--- Sets `indent` and extends `header-includes` if needed. | |
---@param meta pandoc.Meta | |
---@return pandoc.Meta|nil meta nil if no changes | |
local function set_meta(meta) | |
local changes = false -- only return if changes are made | |
local header_code = nil | |
local format = Options.format | |
-- set the `indent` metadata variable unless otherwise specified or | |
-- already set to false | |
if Options.set_metadata_variable and not(meta.indent == false) then | |
meta.indent = true | |
changes = true | |
end | |
-- set the `header-includes` metadata variable | |
if Options.set_header_includes and Options.indent then | |
-- do we apply first line indentation globally? | |
if Options.indent then | |
header_code = HeaderIncludes.glob[format](Options.size) | |
end | |
-- provide local explicit indentation styles | |
header_code = header_code .. HeaderIncludes.loc[format](Options.size) | |
-- wrap the header if needed | |
header_code = HeaderIncludes.wrap[format](header_code) | |
-- insert if not empty | |
if header_code ~= '' then | |
add_header_includes(meta, { pandoc.RawBlock(format, header_code)}) | |
changes = true | |
end | |
end | |
return changes and meta or nil | |
end | |
--- process_metadata: process user options. | |
-- read user options, set the `indent` metadata variable, | |
-- add formatting code to `header-includes`. | |
local function process_metadata(meta) | |
local changes = false -- only return if changes are made | |
local header_code = nil | |
local format = format_match('html') and 'html' | |
or (format_match('latex') and 'latex') | |
if not format then | |
return nil | |
else | |
Options.format = format | |
end | |
read_user_options(meta) -- places values in global `options` | |
return set_meta(meta) | |
end | |
--- Main code | |
-- Returns the filters in the desired order of execution | |
return { | |
{ | |
Meta = process_metadata | |
}, | |
{ | |
Pandoc = process_doc | |
} | |
} | |