// D HTML to CHM converter/generator, by Vladimir Panteleev import std.stdio; import std.file; import std.string; import std.regexp; // ******************************************************************** int min(int a, int b) { return a < b ? a : b; } void backSlash(string s) // replace path delimiters in-place { //s=s.dup; foreach(inout c;s) if(c=='/') c='\\'; } bool match(string line, string pattern) { return std.regexp.find(line, pattern)>=0; } string getAnchor(string s) { int i = std.string.find(s, '#'); if(i<0) return ""; else return s[i..$]; } string removeAnchor(string s) { int i = std.string.find(s, '#'); if(i<0) return s; else return s[0..i]; } string absoluteUrl(string base, string url) { backSlash(base); backSlash(url); if (url[0]=='#') return base ~ url; while(base[$-1]!='\\') base = base[0..$-1]; while(url[0..3]=="..\\") { url = url[3..$]; do { base = base[0..$-1]; if(base.length==0) return ""; } while(base[$-1]!='\\'); } return base ~ url; } string movePath(string s) { if(s.length>1 && s[0..2]=="d\\") s = "chm" ~ s[1..$]; return s; } string normalize(string s) { s = tolower(s); string t; foreach(c;s) if(!iswhite(c)) t ~= c; return t; } // ******************************************************************** struct Link { string url, title, text; static Link opCall(string url, string title, string text) { backSlash(url); Link my; my.url = strip(url); my.title = strip(title); my.text = strip(text); return my; } } struct LinkBlock { Link caption; Link[] links; static LinkBlock opCall(string url, string title, string text) { backSlash(url); LinkBlock my; my.caption.url = strip(url); my.caption.title = strip(title); my.caption.text = strip(text); return my; } } class Page { string newFileName; string title; string src; Link[] toctop; LinkBlock[] linkBlocks; bool[string] anchors; } struct KeyLink { string anchor; string title; static KeyLink opCall(string anchor, string title) { KeyLink my; my.anchor = strip(anchor); my.title = strip(title); return my; } } // ******************************************************************** string[] listdirrec(string pathname) { string[] files = null; bool listing(string filename) { string file = std.path.join(pathname, filename); if(isdir(file)) { string oldpath = pathname; pathname = file; listdir(pathname, &listing); pathname = oldpath; } else { files ~= std.path.join(pathname, filename); } return true; // continue } listdir(pathname, &listing); return files; } Page[string] pages; KeyLink[string][string] keywords; // keywords[normalize(keyword)][original url w/o anchor] = anchor/title string[string] keyTable; void addKeyword(string keyword, string link, string title = null) { keyword = strip(keyword); string norm = normalize(keyword); string file = removeAnchor(link); backSlash(file); string anchor = getAnchor(link); if(title==null && norm in keywords && file in keywords[norm]) // when title is present, it overrides any existing anchors/etc. { if(keywords[norm][file].anchor>anchor) // "less" is better keywords[norm][file] = KeyLink(anchor, title); } else keywords[norm][file] = KeyLink(anchor, title); if(title==null && norm in keyTable) { if(keyTable[norm]>keyword) // "less" is better keyTable[norm] = keyword; } else keyTable[norm] = keyword; } void main() { // clean up if(exists("chm")) foreach(file;listdirrec("chm\\")) std.file.remove(file); else mkdir("chm"); string[] files = listdirrec("d\\"); foreach(i,file;files) pages[file] = new Page; RegExp re_title = new RegExp(`(.*) - (The )?D Programming Language( [0-9]\.[0-9])? - Digital Mars`); RegExp re_title2 = new RegExp(`(Digital Mars - The )?D Programming Language( [0-9]\.[0-9])? - (.*)`); RegExp re_title3 = new RegExp(`

(.*)

`); RegExp re_heading = new RegExp(`

(.*)

`); RegExp re_heading_link = new RegExp(`

(.*)

`); RegExp re_nav_link = new RegExp(`
  • (.*)
  • `); RegExp re_anchor = new RegExp(`(<.{1,2}>)*([^<]+)<`); RegExp re_anchor_2 = new RegExp(`]*)>(<.{1,2}>)*([^<]+)<`); RegExp re_link = new RegExp(`(<.{1,2}>)*([^<]+)<`); RegExp re_def = new RegExp(`
    (.*)([^<]+)<`); foreach(fileName,page;pages) with(page) { string destdir = movePath(std.path.getDirName(fileName)); if(!exists(destdir)) mkdir(destdir); newFileName = movePath(fileName); if(match(fileName, `\.html$`)) { writefln("Processing "~fileName); src = cast(string)read(fileName); string[] lines = splitlines(src); string[] newlines = null; bool skip = false, intoctop = false, innavblock = false, innavblock2 = false; int dl = 0; string anchor = null; anchors[""] = true; foreach(origline;lines) { string line = origline; bool nextSkip = skip; if(match(line, `
  • D .\.0 (...)|(→)
  • `)) continue; // don't process link as well if (re_title.test(line)) { title = strip(re_title.match(1)); line = re_title.replace(`` ~ title ~ ``); } if (re_title2.test(line)) { title = strip(re_title2.match(3)); line = re_title2.replace(`` ~ title ~ ``); } if (re_title2.test(line)) if(title=="") title = strip(re_title2.match(1)); if (re_anchor.test(line)) { anchor = '#' ~ re_anchor.match(1); anchors[anchor] = true; } else if (re_anchor_2.test(line)) { anchor = '#' ~ re_anchor_2.match(1); anchors[anchor] = true; } if(match(line, `
    `)) intoctop = true; if(match(line, ``)) intoctop = innavblock2 = false; if(std.string.find(line, `
    `)>=0) dl++; if(dl==1) { if(re_def.test(line)) { anchor = re_def.match(2); while("#"~anchor in anchors) anchor ~= '_'; anchors["#"~anchor] = true; line = re_def.pre ~ re_def.replace(`
    $1$2<`) ~ re_def.post; //writefln("new line: ", line); addKeyword(re_def.match(2), fileName ~ "#" ~ anchor); } } if(std.string.find(line, `
    `)>=0) dl--; if(re_heading_link.test(line)) { if(innavblock2) linkBlocks ~= LinkBlock(re_heading_link.match(1), re_heading_link.match(3), re_heading_link.match(4)); } else if(re_heading.test(line)) { if(innavblock2) linkBlocks ~= LinkBlock("", "", re_heading.match(1)); } if(re_nav_link.test(line)) if(intoctop) toctop ~= Link(re_nav_link.match(1), re_nav_link.match(3), re_nav_link.match(4)); else if(innavblock2) if(re_nav_link.match(1)[0..7]!="http://" && exists(absoluteUrl(fileName, re_nav_link.match(1)))) linkBlocks[$-1].links ~= Link(re_nav_link.match(1), re_nav_link.match(3), re_nav_link.match(4)); //else // writefln("Displaced link: ", line); if(re_anchor.test(line)) addKeyword(re_anchor.match(3), fileName ~ "#" ~ re_anchor.match(1)); else if(re_anchor_2.test(line)) addKeyword(re_anchor_2.match(3), fileName ~ "#" ~ re_anchor_2.match(1)); if(re_link.test(line)) if(re_link.match(1)[0..min($,7)]!="http://") addKeyword(re_link.match(3), absoluteUrl(fileName, re_link.match(1))); // skip Google ads if(match(line, `^