I am trying to write a script that will walk through an HTML source and create a JSON file of the DOM that will then be shown in a tree view using d3.js. The problem I have is the desire to show not only the element (TITLE, P, LI, etc.) but also the value of the element. This is easy enough IF I only do it for end node elements (no children). But, I sometimes need it for parent objects as in the UL below for items II and B.
<ul class="level-1">
<li>I</li>
<li>II
<ul class="level-2">
<li>A</li>
<li>B
<ul class="level-3">
<li>1</li>
<li>2</li>
<li>3</li>
</ul>
</li>
<li>C</li>
</ul>
</li>
<li>III</li>
</ul>
From my function below this line is working for end nodes but I cannot figure out how to do this for parents without printing everything from every child.
$output.append(', "value": "' + $(child).text() + '"}');
I have tried some first-child stuff from jQuery but could not get it to work. I also want to keep this as generic as possible to feed in any html source. In other words, I do not want to say if (nodeName = ‘LI’) then -do list item specific stuff-
var createJsonOutput = function(domObject) {
var $currentChildren = domObject.children();
var $childrenCnt = $currentChildren.length
$.each($currentChildren, function(idx,child) {
$output.append('{"name": "' + child.nodeName + '"');
//does the child have children?
if ($(child).children().length > 0) {
$output.append(',"children": [');
createJsonOutput($(child));
$output.append(']}');
} else if (child.nodeName != 'TEXTAREA' && child.nodeName != 'SCRIPT') {
$output.append(', "value": "' + $(child).text() + '"}');
} else {
$output.append('}');
}
if ((idx + 1) < $childrenCnt) {
$output.append(',');
}
});
};
createJsonOutput($('html'));
EXAMPLE (unformated) JSON:
{"name": "HTML","children": [{"name": "HEAD","children": [{"name": "META", "value": ""},{"name": "TITLE", "value": "Node-Link Tree"},{"name": "SCRIPT"},{"name": "SCRIPT"},{"name": "LINK", "value": ""}]},{"name": "BODY","children": [{"name": "DIV","children": [{"name": "UL","children": [{"name": "LI", "value": "I"},{"name": "LI","children": [{"name": "UL","children": [{"name": "LI", "value": "A"},{"name": "LI","children": [{"name": "UL","children": [{"name": "LI", "value": "1"},{"name": "LI", "value": "2"},{"name": "LI", "value": "3"}]}]},{"name": "LI", "value": "C"}]}]},{"name": "LI", "value": "III"}]}]},{"name": "DIV","children": [{"name": "TEXTAREA"},{"name": "P", "value": "tree time!"}]},{"name": "DIV", "value": ""},{"name": "SCRIPT"}]}]}
you can write a function to return just text of the current element,
http://viralpatel.net/blogs/2011/02/jquery-get-text-element-without-child-element.html