Module:Clade/converter

From OODA WIKI

Documentation for this module may be created at Module:Clade/converter/doc

--require('strict')   -- comment out until clade also uses noglobals

local p = {}

local pargs = mw.getCurrentFrame():getParent().args

--[[ =================== parser for conversion to clade structure =============================
    
    Function p.newickConverter()
        convert Newick strings to clade format
		Usage: {{#invoke:Module:Sandbox/Jts1882/CladeN|newickConverter|newickstring={{{NEWICK_STRING}}} }}

    Function p.listConverter()
        convert wikitext-like lists to clade format
        use @ instead of * in wikitext to avoid processing
    	Usage: {{#invoke:Module:Clade/converter|listConverter|list={{{LIST_STRING}}} }}
]]
function p.cladeConverter(frame)
	if frame.args['newickstring'] or pargs['newick'] or pargs['newickstring'] then
		return p.newickConverter(frame)
	elseif frame.args['list'] or pargs['list'] then
		return p.listConverter(frame)
	end
end

--[[ =================== Newick to clade parser function =============================

	Function of convert Newick strings to clade format

	Usage: {{#invoke:Module:Sandbox/Jts1882/CladeN|newickConverter|newickstring={{{NEWICK_STRING}}} }}
]]
function p.newickConverter(frame)
	
	local newickString = frame.args['newickstring'] or pargs['newick'] or pargs['newickstring']
	
	--if newickString == '{{{newickstring}}}' then return newickString  end

    newickString = require('Module:Clade').processNewickString(newickString,"") -- "childNumber")
    
    
	-- show the Newick string
	local cladeString = ''
	local levelNumber = 1           --  for depth of iteration
	local childNumber = 1           --  number of sister elements on node  (always one for root)
	
	--  converted the newick string to the clade structure
	cladeString = cladeString .. '{{clade'
	cladeString = cladeString .. p.newickParseLevel(newickString, levelNumber, childNumber) 
	cladeString = cladeString .. '\r}}'  

	local resultString = ''
    local option = mw.getCurrentFrame():getParent().args['option'] or ''
    if option == 'tree' then
	 	--show the transcluded clade diagram
		resultString =   cladeString    	
    else
    	-- show the Newick string
    	resultString = '<div>Modified Newick string:'
    	                .. '<pre>'..newickString..'</pre>'	
	    -- show the converted clade structure
	    resultString = resultString .. 'Output of clade template structure:'
	                                 .. '<pre>'.. cladeString ..'</pre></div>'	
    end
    --resultString = frame:expandTemplate{ title = 'clade',  frame:preprocess(cladeString) }

    return resultString
end

--[[ Parse one level of Newick string
     This function receives a Newick string, which has two components
      1. the right hand term is a clade label: |labelN=labelname
      2. the left hand term in parenthesis has common delimited child nodes, each of which can be
           i.  a taxon name which just needs:  |N=leafname 
           ii. a Newick string which needs further processing through reiteration
]]
function p.newickParseLevel(newickString,levelNumber,childNumber)

    
	local cladeString = ""
	local indent = p.getIndent(levelNumber) 
	--levelNumber=levelNumber+1
	
	local j=0
	local k=0
	j,k = string.find(newickString, '%(.*%)')                 -- find location of outer parenthesised term
	local innerTerm = string.sub(newickString, j+1, k-1)      -- select content in parenthesis
	local outerTerm = string.gsub(newickString, "%b()", "")   -- delete parenthetic term

	cladeString = cladeString .. indent .. '|label'..childNumber..'='  .. outerTerm
	cladeString = cladeString .. indent .. '|' .. childNumber..'='  .. '{{clade'

	levelNumber=levelNumber+1
	indent = p.getIndent(levelNumber)
	
		-- protect commas in inner parentheses from split; temporarily replace commas between parentheses
	    local innerTerm2 =  string.gsub(innerTerm, "%b()",  function (n)
	                                         	return string.gsub(n, ",%s*", "XXX")  -- also strip spaces after commas here
	                                            end)
	
		--local s = p.strsplit(innerTerm2, ",")
		local s = mw.text.split(innerTerm2, ",")
		local i=1	
		while s[i] do	
			local restoredString = string.gsub(s[i],"XXX", ",")   -- convert back to commas
	
			local outerTerm = string.gsub(restoredString, "%b()", "")
			if string.find(restoredString, '%(.*%)') then
				--cladeString = cladeString .. indent .. '|y' .. i .. '=' .. p.newickParseLevel(restoredString,levelNumber+1,i) 
				cladeString = cladeString  .. p.newickParseLevel(restoredString,levelNumber,i) 
			else
				cladeString = cladeString .. indent .. '|' .. i .. '=' .. restoredString --.. '(level=' .. levelNumber .. ')'
			end
			i=i+1
		end
--    end -- end splitting of strings

	cladeString = cladeString .. indent .. '}}'  
    return cladeString
end

function p.getIndent(levelNumber)
	local indent = "\r"
	local extraIndent = pargs['indent'] or mw.getCurrentFrame().args['indent'] or 0
	
	while tonumber(extraIndent) > 0 do
	    indent = indent .. " " -- an extra indent to make aligining compound trees easier
	    extraIndent = extraIndent - 1
	end
	
	while levelNumber > 1 do
		indent = indent .. "   "
		levelNumber = levelNumber-1
	end
	return indent
end


--[[ =================== experimental list to clade parser function =============================

    Function of convert wikitext-like listss to clade format
      - use @ instead of * in wikitext to avoid processing

    Usage: {{#invoke:Module:Clade/converter|listConverter|list={{{LIST_STRING}}} }}
]]

function p.listConverter(frame)
	
	local listString = frame.args['list'] or mw.getCurrentFrame():getParent().args['list']

	-- show the list string
	local cladeString = ''
	local levelNumber = 1           --  for depth of iteration
	local childNumber = 1           --  number of sister elements on node  (always one for root)
	local indent = p.getIndent(levelNumber)
	--  converted the newick string to the clade structure
	cladeString = cladeString .. indent .. '{{clade'
	cladeString = cladeString .. p.listParseLevel(listString, levelNumber, childNumber) 
	--cladeString = cladeString .. '\r}}'  

	local resultString = ''
    local option = mw.getCurrentFrame():getParent().args['option'] or ''
    if option == 'tree' then
	 	--show the transcluded clade diagram
		resultString =   cladeString    	
    else
    	-- show the list string
		--resultString = '<pre>'..listString..'</pre>'	
	    -- show the converted clade structure
	    resultString = resultString .. '<pre>'.. cladeString ..'</pre>'	
    end
    --resultString = frame:expandTemplate{ title = 'clade',  frame:preprocess(cladeString) }

    return resultString
end

function p.listParseLevel(listString,levelNumber,childNumber)

	local cladeString = ""
	local indent = p.getIndent(levelNumber)
    levelNumber=levelNumber+1

    local list = mw.text.split(listString, "\n")
    local i=1
    local child=1
    local lastNode=0
    
    while list[i]  do
		list[i]=list[i]:gsub("^@", "")               -- strip the first @
		
		if not string.match( list[i], "^@", 1 ) then -- count children at this level (not beginning wiht @)
			lastNode = lastNode+1  
		end
		i=i+1
	end
    i=1

	while list[i]  do

	    --[[ pseudocode: 
	         if next value begins with @ we have a subtree, 
	        	which must be recombined and past iteratively
	         else we have a simple leaf
	    ]]

	    -- if the next value begins with @, we have a subtree which should be recombined
	    if list[i+1] and string.match( list[i+1], "^@", 1 )  then
	    	
	        local label=list[i]
           	i=i+1
	    	local recombined = list[i]
	    	while list[i+1] and string.match( list[i+1], "^@", 1 ) do
	    		recombined = recombined .. "\n" .. list[i+1] 
	    		i=i+1
	    	end
	    	cladeString = cladeString .. indent .. '|label' .. child ..'=' ..  label	
	    	cladeString = cladeString .. indent .. '|' .. child ..'=' ..  '{{clade'
	    	                          .. p.listParseLevel(recombined,levelNumber,i)  
	    else
	    	cladeString = cladeString .. indent .. '|' .. child ..'=' ..  list[i]	
	    end
		i=i+1
		child=child+1
	end


	cladeString = cladeString .. indent .. '}}'  
	return cladeString
end
return p