Skip to content
Open

dom #49

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--format doc
--color
--require spec_helper
76 changes: 76 additions & 0 deletions lib/dom_tree.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
class DomTree
attr_reader :string, :document

def initialize
@document = Node.new("document head", nil, 0, [])
end

def parse_string(str)
str.scan(/<.*?>|[[a-zA-Z]\d\p{P}\s]*/).map(&:strip).reject(&:empty?)
end

def build_tree(str)
parsed_html = parse_string(str)
top = @document
parsed_html.each do |item|
node = build_node(item)
top = add_node(node, top)
end
end

def add_node(node, top)
if opening_tag?(node.type)
add_opening_tag(node, top)
else
add_other_tag(node, top)
end
end

def add_opening_tag(node, top)
top.children << node
node.children, node.parent, node.depth =
[], top, (top.depth + 1)
node
end

def add_other_tag(node, top)
if closing_tag?(node.type)
top.parent
else
top.children << node
node.parent, node.depth = top, (top.depth + 1)
top
end
end

def opening_tag?(item)
item[0] == "<" && item[1] != "/"
end

def closing_tag?(item)
item[0] == "<" && item[1] == "/"
end

def build_node(type)
node = Node.new(type)
node.build_attributes_hash if opening_tag?(node.type)
node
end

def print_to_file
file = File.open('output.html', 'w')
render(@document, file)
file.close
end

def render(top, file)
file << "#{" " * top.depth}#{top.type}\n"
top.children.each { |element| render(element, file) } if top.children
file << "#{" " * top.depth}#{make_closing(top.type)}>\n" if opening_tag?(top.type)
end

def make_closing(tag)
match = tag.match(/<(\w*\d*)/).to_s
match.insert(1, "/")
end
end
15 changes: 15 additions & 0 deletions lib/node.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Node = Struct.new(:type, :parent, :depth, :children, :attributes) do

def build_attributes_hash
att_hash = {}
attribute_pairs = self.type.scan((/([[a-zA-Z]\d\p{p}]*)\s*=\s*\"([[a-zA-Z]\p{Pd}\s*]*)"/))
solo = self.type.scan(/[[a-zA-Z]\d\p{p}]*\s*=\s*\"[[a-zA-Z]\p{Pd}\s*]*"|\s(?<!p{P}\s)([\w*]*)[\s|>]/).flatten
attribute_pairs.each do |item|
item[1] = item[1].split(" ")
att_hash[item[0]] = item[1]
end
solo.each { |item| att_hash[item] = true unless item.nil?}
self.attributes = att_hash
end

end
71 changes: 71 additions & 0 deletions lib/node_renderer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

class NodeRenderer

def initialize(tree)
@tree = tree
end

def render(node)
node = @tree if node == nil
nodes_below(node)
node_type_count(node)
node_attributes(node)
end

def nodes_below(node)
stack = [node]
count = 0
while item = stack.pop
count += item.children.length if item.children
stack += add_children_to_stack(item)
end
p "There are #{count} children in this nodes subtree"
end

def node_type_count(node)
stack = [node]
type_hash = Hash.new(0)
while item = stack.pop
match = get_type(item.type)
type_hash = update_hash(match, type_hash, item, node)
stack += add_children_to_stack(item)
end
print_hash(type_hash)
end

def add_children_to_stack(item)
stack = []
if children = item.children
children.each { |child| stack << child }
end
stack
end

def print_hash(hash)
hash.each do |key, val|
puts "There are #{val} #{key}(s) in this nodes subtree"
end
end

def update_hash(match, hash, item, node)
if blank_or_star_node?(match, item, node)
match == nil ? hash["text"] += 1 : hash[match] += 1
end
hash
end

def blank_or_star_node?(match, item, node)
match != "" && item != node
end

def get_type(tag)
if match = tag.match(/<([a-z]*\d*)\W/)
match.captures[0]
end
end

def node_attributes(node)
p node.attributes
end

end
56 changes: 56 additions & 0 deletions lib/tree_searcher.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
require_relative "node"
require_relative "dom_tree"
require_relative "node_renderer"

class TreeSearcher

def initialize(tree)
@tree = tree
end

def search_by(attribute, text)
stack = [@tree]
matching_nodes = []
while node = stack.pop
matching_nodes += match_attributes(node, attribute, text)
stack += add_children_to_stack(node)
end
matching_nodes
end

def add_children_to_stack(node)
stack = []
if children = node.children
children.each { |child| stack << child}
end
stack
end

def match_attributes(node, attribute, text)
arr = []
if att = get_attribute(node.type, attribute.to_s)
att.each { |item| arr << node if item == text }
elsif text == node.type
arr << node
end
arr
end

def get_attribute(str, attribute)
if pattern = str.match(/#{attribute}\s?=\s?"(.*?)"/)
return pattern.captures[0].split if attribute == "class"
pattern.captures
end
end

end

dom = DomTree.new
file = File.open("test.html", "rb")
contents = file.read
file.close
dom.build_tree(contents)
searcher = TreeSearcher.new(dom.document)
node = searcher.search_by(:class, "top-div")[0]
NodeRenderer.new(dom.document).render(node)
dom.print_to_file
75 changes: 75 additions & 0 deletions output.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
document head
<!doctype html>
<html>
<head>
<title>
This is a test page
</title>
</head>
<body>
<div disabled class="top-div bye" id="hello">
I'm an outer div!!!
<div class="inner-div">
I'm an inner div!!! I might just
<em>
emphasize
</em>
some text.
</div>
I am EVEN MORE TEXT for the SAME div!!!
</div>
<main id="main-area">
<header class="super-header">
<h1 class="emphasized">
Welcome to the test doc!
</h1>
<h2>
This document contains data
</h2>
</header>
<ul>
Here is the data:
<li>
Four list items
</li>
<li class="bold funky important">
One unordered list
</li>
<li class ="bold">
One h1
</li>
<li>
One h2
</li>
<li>
One header
</li>
<li>
One main
</li>
<li>
One body
</li>
<li>
One html
</li>
<li>
One title
</li>
<li>
One head
</li>
<li>
One doctype
</li>
<li>
Two divs
</li>
<li>
And infinite fun!
</li>
</ul>
</main>
</body>
</html>
</>
60 changes: 60 additions & 0 deletions spec/dom_tree_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

require 'node'
require 'dom_tree'

describe DomTree do

let(:dom_tree) { DomTree.new }
let(:dom_tree_error) { DomTree.new("arguments")}
let (:test_html) { "<div>
div text before
<p>
p text
</p>
<div>
more div text
</div>
div text after
</div>"
}

describe '#initialize' do
it 'returns an instance of DomTree' do
expect(dom_tree).to be_an_instance_of(DomTree)
end

it 'will return an error if initalized with an argument' do
expect {dom_tree_error}.to raise_error(ArgumentError)
end

it 'creates a document node with the type set to document head' do
expect(dom_tree.document.type).to eq("document head")
end
end


describe '#parse_string' do


it 'takes a string as an argument' do
expect { dom_tree.parse_string("test") }.to_not raise_error
end

it 'removes html formatting and retuns an array of strings for each element' do
expect(dom_tree.parse_string(test_html)).to eq(["<div>", "div text before", "<p>", "p text", "</p>", "<div>", "more div text", "</div>", "div text after", "</div>"])
end

end

describe '#build_tree' do
it 'takes a string as an argument' do
expect { dom_tree.build_tree("test") }.to_not raise_error
end

it 'after building a tree the head node has correct number of children' do
dom_tree.build_tree(test_html)
expect(dom_tree.document.children.length).to eq(1)
end
end

end
19 changes: 19 additions & 0 deletions spec/node_renderer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

require 'node_renderer'

describe NodeRenderer do
let(:node_render) { NodeRenderer.new("tree")}


describe '#intialize' do
it 'returns an instance of NodeRenderer' do
expect(node_render).to be_an_instance_of(NodeRenderer)
end

it 'takes one argument' do
expect{node_render}.to_not raise_error
end

end

end
Loading