From 7edb1e490c02ea05b4cb1002cfcf411e636120c4 Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Fri, 15 Jul 2016 13:33:55 -0500 Subject: [PATCH 01/22] added domtree class and hashes --- dom_tree.rb | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 dom_tree.rb diff --git a/dom_tree.rb b/dom_tree.rb new file mode 100644 index 0000000..e4360d2 --- /dev/null +++ b/dom_tree.rb @@ -0,0 +1,61 @@ + +class DomTree + + +# p = "

" + +# p.match(/[a-z]/) + +# "

" + +# "" + + def parse_tag(tag) + parsed = { "type" => tag.match(/<([a-z]*\d*)\W/).captures, + "classes" => tag.match(/class='(.*?)'/).captures.split, + "id" => "", + "name" => "" } + + + + parsed["classes"] = + + + #looks for the thing after '<' for type + #looks for 'class' + #looks for 'id' + #looks for 'name' + #sets all in a hash with keys and values ------- + end + + def classes + end + + def type + + end + + def id + + end + + def name + + end + + + +# tag = parse_tag("

") +# tag.type +# #=> "p" +# tag.classes +# #=> ["foo", "bar"] +# tag.id +# #=> "baz" +# tag.name +# #=> "fozzie" + + +end + +sfsdfd \ No newline at end of file From 938e3298ce09475f0d37318d420111bafacf00c6 Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Fri, 15 Jul 2016 14:57:06 -0400 Subject: [PATCH 02/22] completed dom tree tests --- dom_tree.rb | 77 ++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 42 deletions(-) diff --git a/dom_tree.rb b/dom_tree.rb index e4360d2..c4c5f84 100644 --- a/dom_tree.rb +++ b/dom_tree.rb @@ -1,61 +1,54 @@ class DomTree + attr_reader :tag - -# p = "

" - -# p.match(/[a-z]/) - -# "

" - -# "" - - def parse_tag(tag) - parsed = { "type" => tag.match(/<([a-z]*\d*)\W/).captures, - "classes" => tag.match(/class='(.*?)'/).captures.split, - "id" => "", - "name" => "" } - - - - parsed["classes"] = - - - #looks for the thing after '<' for type - #looks for 'class' - #looks for 'id' - #looks for 'name' - #sets all in a hash with keys and values ------- + def initialize(tag) + @tag = tag end - def classes + def type + @tag.match(/<([a-z]*\d*)\W/).captures[0] if tag.match(/<([a-z]*\d*)\W/) end - def type - + def classes + @tag.match(/class\s?=\s?'(.*?)'/).captures[0].split if tag.match(/class\s?=\s?'(.*?)'/) end def id - + @tag.match(/id\s?=\s?'(.*?)'/).captures[0] if tag.match(/id\s?=\s?'(.*?)'/) end def name - + @tag.match(/name\s?=\s?'(.*?)'/).captures[0] if tag.match(/name\s?=\s?'(.*?)'/) end + def title + @tag.match(/title\s?=\s?'(.*?)'/).captures[0] if tag.match(/title\s?=\s?'(.*?)'/) + end - -# tag = parse_tag("

") -# tag.type -# #=> "p" -# tag.classes -# #=> ["foo", "bar"] -# tag.id -# #=> "baz" -# tag.name -# #=> "fozzie" - + def src + @tag.match(/src\s?=\s?'(.*?)'/).captures[0] if tag.match(/src\s?=\s?'(.*?)'/) + end end -sfsdfd \ No newline at end of file +par = DomTree.new("

") +d = DomTree.new("

") +i = DomTree.new("") + +p par.type +p par.classes +p par.id +p par.name + +p d.type +p d.classes +p d.id +p d.name + +p i.type +p i.classes +p i.id +p i.name +p i.src +p i.title From 8cd59932c208df6b30b7e81c0a5cdf2760a53478 Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Fri, 15 Jul 2016 17:05:17 -0400 Subject: [PATCH 03/22] completed warmup 1 --- dom_tree.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dom_tree.rb b/dom_tree.rb index c4c5f84..146272f 100644 --- a/dom_tree.rb +++ b/dom_tree.rb @@ -7,7 +7,9 @@ def initialize(tag) end def type - @tag.match(/<([a-z]*\d*)\W/).captures[0] if tag.match(/<([a-z]*\d*)\W/) + if match = tag.match(/<([a-z]*\d*)\W/) + match.captures[0] + end end def classes From d7dbc6a08bc3645c87c2ddc8ddb3258c1883e9fe Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Fri, 15 Jul 2016 16:26:02 -0500 Subject: [PATCH 04/22] built struct and class --- html_parse.rb | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 html_parse.rb diff --git a/html_parse.rb b/html_parse.rb new file mode 100644 index 0000000..d3acf7a --- /dev/null +++ b/html_parse.rb @@ -0,0 +1,72 @@ +#we'll have nodes +#nodes like type +#no attributes in example +#we're going to need a tree +#we'll feed it a string and it will break ever differnt type +# to it's own node and add to the tree. +#outputter method that prints the tree. + + +Node = Struct.new(:type, :children) + + +class DomTree + + + + attr_reader :string + + def initialize(str) + @string = str + end + + def build_node(type) + Node.new(type) + end + + def parse_string(str) + b = str.split("\n").map {|item| item.strip} + b + + end + +end + + + + +#
+# div text before +#

+# p text +#

+#
+# more div text +#
+# div text after +#
+ + + + +# # The HTML string version +# # You could read it in from a file instead if so inclined, +# # which would give you newline characters too +# html_string = "
div text before

p text

more div text
div text after
" + +# # Now pull that string into a simple data structure +# data_structure = parser_script( html_string ) + +# # Finally, output the string again. +# # It doesn't have to have pretty spacing like it does here... +# outputter( data_structure ) +# #
+# # div text before +# #

+# # p text +# #

+# #
+# # more div text +# #
+# # div text after +# #
\ No newline at end of file From bf4a2c70a1a004dc6fdd952c80fb39464632e44b Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Fri, 15 Jul 2016 17:51:02 -0400 Subject: [PATCH 05/22] working on regular expression --- html_parse.rb | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/html_parse.rb b/html_parse.rb index d3acf7a..cbc4afb 100644 --- a/html_parse.rb +++ b/html_parse.rb @@ -2,22 +2,18 @@ #nodes like type #no attributes in example #we're going to need a tree -#we'll feed it a string and it will break ever differnt type +#we'll feed it a string and it will break ever differnt type # to it's own node and add to the tree. #outputter method that prints the tree. Node = Struct.new(:type, :children) - class DomTree - - - attr_reader :string def initialize(str) - @string = str + @string = str end def build_node(type) @@ -25,17 +21,32 @@ def build_node(type) end def parse_string(str) - b = str.split("\n").map {|item| item.strip} - b - + b = str.split("\n").map { |item| item.strip } + b.map do |item| + if match = item.match(/<(\w*\d*)>/) + match.captures[0] + elsif text = item.match(/\A(.*)\z/) + text.captures[0] + end + end.compact end end +dom = DomTree.new("
+ div text before +

+ p text +

+
+ more div text +
+ div text after +
") +p dom.parse_string(dom.string) - -#
+# "
# div text before #

# p text @@ -44,7 +55,7 @@ def parse_string(str) # more div text #

# div text after -#
+#
" @@ -69,4 +80,4 @@ def parse_string(str) # # more div text # #
# # div text after -# #
\ No newline at end of file +# # From a1fdee51c8bd8932185a7788994a48c602897cbf Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Fri, 15 Jul 2016 17:32:37 -0500 Subject: [PATCH 06/22] added then removed parser --- html_parse.rb | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/html_parse.rb b/html_parse.rb index cbc4afb..e26eb1b 100644 --- a/html_parse.rb +++ b/html_parse.rb @@ -21,18 +21,27 @@ def build_node(type) end def parse_string(str) - b = str.split("\n").map { |item| item.strip } - b.map do |item| - if match = item.match(/<(\w*\d*)>/) - match.captures[0] - elsif text = item.match(/\A(.*)\z/) - text.captures[0] - end - end.compact + b = str.split(/<\w*\d*/).map {|item| item.strip} + + + # b = str.split("\n").map { |item| item.strip } + #.reject { |item| item.match(/<\/(\w*\d*)>/)} + # b.map do |item| + # if match = item.match(/<(\w*\d*)>/) + # match.captures[0] + # elsif text = item.match(/\A(.*)\z/) + # text.captures[0] + # end + # end.compact end + + + end + + dom = DomTree.new("
div text before

From dd91d85f39a91b89015f3a2a5e26eb1aaf6b9b96 Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Fri, 15 Jul 2016 19:02:57 -0400 Subject: [PATCH 07/22] fixed regex --- html_parse.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/html_parse.rb b/html_parse.rb index e26eb1b..1785d89 100644 --- a/html_parse.rb +++ b/html_parse.rb @@ -21,11 +21,9 @@ def build_node(type) end def parse_string(str) - b = str.split(/<\w*\d*/).map {|item| item.strip} - - + str.scan(/<.*>|.*/).map(&:strip).reject(&:empty?) # b = str.split("\n").map { |item| item.strip } - #.reject { |item| item.match(/<\/(\w*\d*)>/)} + #.reject { |item| item.match(/<\/(\w*\d*)>/)} # b.map do |item| # if match = item.match(/<(\w*\d*)>/) # match.captures[0] @@ -35,6 +33,10 @@ def parse_string(str) # end.compact end + def build_tree + + end + From 6422e7d5fec6383c28ab54291b1de8d52764912c Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Fri, 15 Jul 2016 18:28:25 -0500 Subject: [PATCH 08/22] built tree builder --- html_parse.rb | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/html_parse.rb b/html_parse.rb index 1785d89..448c04a 100644 --- a/html_parse.rb +++ b/html_parse.rb @@ -7,13 +7,15 @@ #outputter method that prints the tree. -Node = Struct.new(:type, :children) +Node = Struct.new(:type, :children :parent) class DomTree attr_reader :string def initialize(str) @string = str + @document = build_node("document head") + @parsed_string = [] end def build_node(type) @@ -21,20 +23,27 @@ def build_node(type) end def parse_string(str) - str.scan(/<.*>|.*/).map(&:strip).reject(&:empty?) - # b = str.split("\n").map { |item| item.strip } - #.reject { |item| item.match(/<\/(\w*\d*)>/)} - # b.map do |item| - # if match = item.match(/<(\w*\d*)>/) - # match.captures[0] - # elsif text = item.match(/\A(.*)\z/) - # text.captures[0] - # end - # end.compact + @parsed_string = str.scan(/<.*>|.*/).map(&:strip).reject(&:empty?) end def build_tree + #loop through each item in parse string; + #for each item we want to build a node and set parent's node pointers; + #for each item in the array, loop through each item in array + #and check if its an html item[0] == < && item[1] != / + top = @document + @parsed_string.each do |item| + if item[0] == "<" && item[1] != "/" + top.children << top = Node.new(item, [], top) + elsif item[0] == "<" && item[1] == "/" + top = top.parent + else + top.children << Node.new(item, nil, top) + end + + + end From b33e77e6029596512def32515e1ee6e62d991985 Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Fri, 15 Jul 2016 19:45:38 -0400 Subject: [PATCH 09/22] basic render method --- html_parse.rb | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/html_parse.rb b/html_parse.rb index 448c04a..0b76847 100644 --- a/html_parse.rb +++ b/html_parse.rb @@ -7,47 +7,42 @@ #outputter method that prints the tree. -Node = Struct.new(:type, :children :parent) +Node = Struct.new(:type, :children, :parent, :depth) class DomTree - attr_reader :string + attr_reader :string, :document def initialize(str) @string = str - @document = build_node("document head") + @document = Node.new("document head", [], nil, 0) @parsed_string = [] end - def build_node(type) - Node.new(type) - end - def parse_string(str) @parsed_string = str.scan(/<.*>|.*/).map(&:strip).reject(&:empty?) end def build_tree - #loop through each item in parse string; - #for each item we want to build a node and set parent's node pointers; - #for each item in the array, loop through each item in array - #and check if its an html item[0] == < && item[1] != / top = @document @parsed_string.each do |item| if item[0] == "<" && item[1] != "/" - top.children << top = Node.new(item, [], top) + top.children << top = Node.new(item, [], top, top.depth + 1) elsif item[0] == "<" && item[1] == "/" + top.children << Node.new(item, nil, top, top.depth + 1) top = top.parent else - top.children << Node.new(item, nil, top) + top.children << Node.new(item, nil, top, top.depth + 1) + end end - - - - end - - + def render + stack = [@document] + while item = stack.pop + item.children.each { |type| stack << type } if item.children + p item.type + end + end end @@ -64,7 +59,9 @@ def build_tree div text after

") -p dom.parse_string(dom.string) +dom.parse_string(dom.string) +dom.build_tree +dom.render # "
# div text before From d85381835ce4132fcc5f4b376f7cdf586a693c41 Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Fri, 15 Jul 2016 19:01:28 -0500 Subject: [PATCH 10/22] finished render --- html_parse.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/html_parse.rb b/html_parse.rb index 0b76847..eed5d5a 100644 --- a/html_parse.rb +++ b/html_parse.rb @@ -7,7 +7,7 @@ #outputter method that prints the tree. -Node = Struct.new(:type, :children, :parent, :depth) +Node = Struct.new(:type, :children, :parent, :depth ) class DomTree attr_reader :string, :document @@ -38,8 +38,8 @@ def build_tree def render stack = [@document] - while item = stack.pop - item.children.each { |type| stack << type } if item.children + while item = stack.shift + item.children.reverse_each { |type| stack.unshift(type) } if item.children p item.type end end From fe94cb7e743a63d70d45d4b4e2defe68a2f16a0e Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Fri, 15 Jul 2016 20:32:00 -0400 Subject: [PATCH 11/22] updated render --- html_parse.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/html_parse.rb b/html_parse.rb index eed5d5a..bda9e33 100644 --- a/html_parse.rb +++ b/html_parse.rb @@ -28,7 +28,7 @@ def build_tree if item[0] == "<" && item[1] != "/" top.children << top = Node.new(item, [], top, top.depth + 1) elsif item[0] == "<" && item[1] == "/" - top.children << Node.new(item, nil, top, top.depth + 1) + top.children << Node.new(item, nil, top, top.depth) top = top.parent else top.children << Node.new(item, nil, top, top.depth + 1) @@ -37,10 +37,10 @@ def build_tree end def render - stack = [@document] - while item = stack.shift - item.children.reverse_each { |type| stack.unshift(type) } if item.children - p item.type + queue = [@document] + while item = queue.shift + item.children.reverse_each { |type| queue.unshift(type) } if item.children + p "#{" " * item.depth}#{item.type}" end end @@ -48,7 +48,7 @@ def render -dom = DomTree.new("
+dom = DomTree.new('
div text before

p text @@ -57,7 +57,7 @@ def render more div text

div text after -
") +
') dom.parse_string(dom.string) dom.build_tree From a4821d06de10ca675ff4e3a75b977e40a678219a Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Sat, 16 Jul 2016 20:31:11 -0400 Subject: [PATCH 12/22] completed searcher and renderer --- dom_tree.rb | 115 ++++++++++++++++++++++++++++++----------------- html_parse.rb | 100 ----------------------------------------- node_renderer.rb | 75 +++++++++++++++++++++++++++++++ tree_searcher.rb | 93 ++++++++++++++++++++++++++++++++++++++ warmups.rb | 56 +++++++++++++++++++++++ 5 files changed, 298 insertions(+), 141 deletions(-) delete mode 100644 html_parse.rb create mode 100644 node_renderer.rb create mode 100644 tree_searcher.rb create mode 100644 warmups.rb diff --git a/dom_tree.rb b/dom_tree.rb index 146272f..3b1aaa2 100644 --- a/dom_tree.rb +++ b/dom_tree.rb @@ -1,56 +1,89 @@ +#we'll have nodes +#nodes like type +#no attributes in example +#we're going to need a tree +#we'll feed it a string and it will break ever differnt type +# to it's own node and add to the tree. +#outputter method that prints the tree. + + +Node = Struct.new(:type, :children, :parent, :depth ) class DomTree - attr_reader :tag + attr_reader :string, :document - def initialize(tag) - @tag = tag + def initialize + @document = Node.new("document head", [], nil, 0) end - def type - if match = tag.match(/<([a-z]*\d*)\W/) - match.captures[0] - end + def parse_string(str) + str.scan(/<.*?>|[\w\p{P}\s]*/).map(&:strip).reject(&:empty?) end - def classes - @tag.match(/class\s?=\s?'(.*?)'/).captures[0].split if tag.match(/class\s?=\s?'(.*?)'/) + def build_tree(str) + parsed_string = parse_string(str) + top = @document + parsed_string.each do |item| + if item[0] == "<" && item[1] != "/" + top.children << top = Node.new(item, [], top, top.depth + 1) + elsif item[0] == "<" && item[1] == "/" + top.children << Node.new(item, nil, top, top.depth) + top = top.parent + else + top.children << Node.new(item, nil, top, top.depth + 1) + end + end end - def id - @tag.match(/id\s?=\s?'(.*?)'/).captures[0] if tag.match(/id\s?=\s?'(.*?)'/) + def render + queue = [@document] + while item = queue.shift + item.children.reverse_each { |type| queue.unshift(type) } if item.children + p "#{" " * item.depth}#{item.type}" + end end - def name - @tag.match(/name\s?=\s?'(.*?)'/).captures[0] if tag.match(/name\s?=\s?'(.*?)'/) - end +end - def title - @tag.match(/title\s?=\s?'(.*?)'/).captures[0] if tag.match(/title\s?=\s?'(.*?)'/) - end +# dom = DomTree.new +# file = File.open("test.html", "rb") +# contents = file.read +# file.close +# dom.build_tree(contents) +# dom.render - def src - @tag.match(/src\s?=\s?'(.*?)'/).captures[0] if tag.match(/src\s?=\s?'(.*?)'/) - end +# "
+# div text before +#

+# p text +#

+#
+# more div text +#
+# div text after +#
" -end -par = DomTree.new("

") -d = DomTree.new("

") -i = DomTree.new("") - -p par.type -p par.classes -p par.id -p par.name - -p d.type -p d.classes -p d.id -p d.name - -p i.type -p i.classes -p i.id -p i.name -p i.src -p i.title + + +# # The HTML string version +# # You could read it in from a file instead if so inclined, +# # which would give you newline characters too +# html_string = "
div text before

p text

more div text
div text after
" + +# # Now pull that string into a simple data structure +# data_structure = parser_script( html_string ) + +# # Finally, output the string again. +# # It doesn't have to have pretty spacing like it does here... +# outputter( data_structure ) +# #
+# # div text before +# #

+# # p text +# #

+# #
+# # more div text +# #
+# # div text after +# #
diff --git a/html_parse.rb b/html_parse.rb deleted file mode 100644 index bda9e33..0000000 --- a/html_parse.rb +++ /dev/null @@ -1,100 +0,0 @@ -#we'll have nodes -#nodes like type -#no attributes in example -#we're going to need a tree -#we'll feed it a string and it will break ever differnt type -# to it's own node and add to the tree. -#outputter method that prints the tree. - - -Node = Struct.new(:type, :children, :parent, :depth ) - -class DomTree - attr_reader :string, :document - - def initialize(str) - @string = str - @document = Node.new("document head", [], nil, 0) - @parsed_string = [] - end - - def parse_string(str) - @parsed_string = str.scan(/<.*>|.*/).map(&:strip).reject(&:empty?) - end - - def build_tree - top = @document - @parsed_string.each do |item| - if item[0] == "<" && item[1] != "/" - top.children << top = Node.new(item, [], top, top.depth + 1) - elsif item[0] == "<" && item[1] == "/" - top.children << Node.new(item, nil, top, top.depth) - top = top.parent - else - top.children << Node.new(item, nil, top, top.depth + 1) - end - end - end - - def render - queue = [@document] - while item = queue.shift - item.children.reverse_each { |type| queue.unshift(type) } if item.children - p "#{" " * item.depth}#{item.type}" - end - end - -end - - - -dom = DomTree.new('
- div text before -

- p text -

-
- more div text -
- div text after -
') - -dom.parse_string(dom.string) -dom.build_tree -dom.render - -# "
-# div text before -#

-# p text -#

-#
-# more div text -#
-# div text after -#
" - - - - -# # The HTML string version -# # You could read it in from a file instead if so inclined, -# # which would give you newline characters too -# html_string = "
div text before

p text

more div text
div text after
" - -# # Now pull that string into a simple data structure -# data_structure = parser_script( html_string ) - -# # Finally, output the string again. -# # It doesn't have to have pretty spacing like it does here... -# outputter( data_structure ) -# #
-# # div text before -# #

-# # p text -# #

-# #
-# # more div text -# #
-# # div text after -# #
diff --git a/node_renderer.rb b/node_renderer.rb new file mode 100644 index 0000000..6ba0b0f --- /dev/null +++ b/node_renderer.rb @@ -0,0 +1,75 @@ +class NodeRenderer + + def initialize(tree) + @tree = tree + end + + def render(node = @tree) + nodes_below(node) + node_type_count(node) + node_attributes(node) + end + + def nodes_below(node) + queue = [node] + count = 0 + while item = queue.pop + if children = item.children + children.each do |child| + queue << child + count += 1 + end + end + end + p "There are #{count} children in this nodes subtree" + end + + def node_type_count(node) + queue = [node] + type_hash = Hash.new(0) + while item = queue.pop + match = get_type(item.type) + if match != "" && item != node + if match == nil + type_hash["text"] += 1 + else + type_hash[match] += 1 + end + end + if children = item.children + children.each do |child| + queue << child + end + end + end + type_hash.each do |key, val| + puts "There are #{val} #{key}(s) if this nodes subtree" + end + end + + def get_type(tag) + if match = tag.match(/<([a-z]*\d*)\W/) + match.captures[0] + end + end + + def node_attributes(node) + if attributes = get_attibutes(node.type) + p attributes + end + end + + def get_attibutes(tag) + if match = tag.match(/<[a-z]*\d*(.*)>/) + match.captures[0] + end + end + +end + +# dom = DomTree.new +# file = File.open("test.html", "rb") +# contents = file.read +# file.close +# dom.build_tree(contents) +# render = NodeRenderer(dom.document) diff --git a/tree_searcher.rb b/tree_searcher.rb new file mode 100644 index 0000000..cb28e31 --- /dev/null +++ b/tree_searcher.rb @@ -0,0 +1,93 @@ +require_relative "dom_tree" +require_relative "node_renderer" + +class TreeSearcher + + def initialize(tree) + @tree = tree + end + + def search_by(attribute, text) + send(attribute, text) + end + + def class(text) + queue = [@tree] + matching_nodes = [] + while node = queue.pop + if classes = get_class(node.type) + classes.each do |cls| + matching_nodes << node if cls == text + end + end + if children = node.children + children.each { |child| queue << child} + end + end + matching_nodes + end + + def get_class(str) + if pattern = str.match(/class\s?=\s?"(.*?)"/) + pattern.captures[0].split + end + end + + def id(text) + queue = [@tree] + matching_nodes = [] + while node = queue.pop + matching_nodes << node if text == get_id(node.type) + if children = node.children + children.each { |child| queue << child} + end + end + matching_nodes + end + + def get_id(str) + if pattern = str.match(/id\s?=\s?"(.*?)"/) + pattern.captures[0] + end + end + + def name(text) + queue = [@tree] + matching_nodes = [] + while node = queue.pop + matching_nodes << node if text == get_name(node.type) + if children = node.children + children.each { |child| queue << child} + end + end + matching_nodes + end + + def get_name(str) + if pattern = str.match(/name\s?=\s?"(.*?)"/) + pattern.captures[0] + end + end + + def text(text) + queue = [@tree] + matching_nodes = [] + while node = queue.pop + matching_nodes << node if text == node.type + if children = node.children + children.each { |child| queue << child} + end + end + matching_nodes + end + +end + +dom = DomTree.new +file = File.open("test.html", "rb") +contents = file.read +file.close +dom.build_tree(contents) +searcher = TreeSearcher.new(dom.document) +node = searcher.search_by(:class, "top-div")[0] +NodeRenderer.new(dom.document).render(node) diff --git a/warmups.rb b/warmups.rb new file mode 100644 index 0000000..146272f --- /dev/null +++ b/warmups.rb @@ -0,0 +1,56 @@ + +class DomTree + attr_reader :tag + + def initialize(tag) + @tag = tag + end + + def type + if match = tag.match(/<([a-z]*\d*)\W/) + match.captures[0] + end + end + + def classes + @tag.match(/class\s?=\s?'(.*?)'/).captures[0].split if tag.match(/class\s?=\s?'(.*?)'/) + end + + def id + @tag.match(/id\s?=\s?'(.*?)'/).captures[0] if tag.match(/id\s?=\s?'(.*?)'/) + end + + def name + @tag.match(/name\s?=\s?'(.*?)'/).captures[0] if tag.match(/name\s?=\s?'(.*?)'/) + end + + def title + @tag.match(/title\s?=\s?'(.*?)'/).captures[0] if tag.match(/title\s?=\s?'(.*?)'/) + end + + def src + @tag.match(/src\s?=\s?'(.*?)'/).captures[0] if tag.match(/src\s?=\s?'(.*?)'/) + end + +end + +par = DomTree.new("

") +d = DomTree.new("

") +i = DomTree.new("") + +p par.type +p par.classes +p par.id +p par.name + +p d.type +p d.classes +p d.id +p d.name + +p i.type +p i.classes +p i.id +p i.name +p i.src +p i.title From a23ba3e33519e6ce1c14f36255ca8460d06d7c2b Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Mon, 18 Jul 2016 12:11:17 -0400 Subject: [PATCH 13/22] updated dom parser --- dom_tree.rb | 6 ++++-- node_renderer.rb | 5 +++-- tree_searcher.rb | 12 +++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/dom_tree.rb b/dom_tree.rb index 3b1aaa2..8640b01 100644 --- a/dom_tree.rb +++ b/dom_tree.rb @@ -17,7 +17,7 @@ def initialize end def parse_string(str) - str.scan(/<.*?>|[\w\p{P}\s]*/).map(&:strip).reject(&:empty?) + str.scan(/<.*?>|[[a-zA-Z]\p{P}\s]*/).map(&:strip).reject(&:empty?) end def build_tree(str) @@ -36,11 +36,13 @@ def build_tree(str) end def render + file = File.open('output.html', 'w') queue = [@document] while item = queue.shift item.children.reverse_each { |type| queue.unshift(type) } if item.children - p "#{" " * item.depth}#{item.type}" + file << "#{" " * item.depth}#{item.type}\n" end + file.close end end diff --git a/node_renderer.rb b/node_renderer.rb index 6ba0b0f..bbc64e8 100644 --- a/node_renderer.rb +++ b/node_renderer.rb @@ -4,7 +4,8 @@ def initialize(tree) @tree = tree end - def render(node = @tree) + def render(node) + node = @tree if node == nil nodes_below(node) node_type_count(node) node_attributes(node) @@ -43,7 +44,7 @@ def node_type_count(node) end end type_hash.each do |key, val| - puts "There are #{val} #{key}(s) if this nodes subtree" + puts "There are #{val} #{key}(s) in this nodes subtree" end end diff --git a/tree_searcher.rb b/tree_searcher.rb index cb28e31..7a5250b 100644 --- a/tree_searcher.rb +++ b/tree_searcher.rb @@ -88,6 +88,12 @@ def text(text) contents = file.read file.close dom.build_tree(contents) -searcher = TreeSearcher.new(dom.document) -node = searcher.search_by(:class, "top-div")[0] -NodeRenderer.new(dom.document).render(node) +TreeSearcher.new(dom.document) +#node = searcher.search_by(:class, "top-div")[0] +#NodeRenderer.new(dom.document).render(nil) +dom.render + +def is_pangram?(str) + str_hash = str.each_with_object({}){ |letter, obj| obj[letter] = true} + ('a'..'z').all? { |letter| str_hash.has_key?(letter) } +end From 987fd0de82abad4cb24fc7fe918cdf24348f95f4 Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Mon, 18 Jul 2016 15:39:03 -0400 Subject: [PATCH 14/22] refactored dom_tree --- dom_tree.rb | 115 ++++++++++++++++++++--------------------------- node.rb | 1 + node_renderer.rb | 66 ++++++++++++++------------- output.html | 75 +++++++++++++++++++++++++++++++ test.html | 2 +- tree_searcher.rb | 93 +++++++++++--------------------------- 6 files changed, 185 insertions(+), 167 deletions(-) create mode 100644 node.rb create mode 100644 output.html diff --git a/dom_tree.rb b/dom_tree.rb index 8640b01..7256b7e 100644 --- a/dom_tree.rb +++ b/dom_tree.rb @@ -1,19 +1,8 @@ -#we'll have nodes -#nodes like type -#no attributes in example -#we're going to need a tree -#we'll feed it a string and it will break ever differnt type -# to it's own node and add to the tree. -#outputter method that prints the tree. - - -Node = Struct.new(:type, :children, :parent, :depth ) - class DomTree attr_reader :string, :document def initialize - @document = Node.new("document head", [], nil, 0) + @document = Node.new("document head", nil, 0, []) end def parse_string(str) @@ -21,71 +10,65 @@ def parse_string(str) end def build_tree(str) - parsed_string = parse_string(str) + parsed_html = parse_string(str) top = @document - parsed_string.each do |item| - if item[0] == "<" && item[1] != "/" - top.children << top = Node.new(item, [], top, top.depth + 1) - elsif item[0] == "<" && item[1] == "/" - top.children << Node.new(item, nil, top, top.depth) - top = top.parent - else - top.children << Node.new(item, nil, top, top.depth + 1) - end + parsed_html.each do |item| + node = build_node(item) + top = add_node(node, top) end end - def render - file = File.open('output.html', 'w') - queue = [@document] - while item = queue.shift - item.children.reverse_each { |type| queue.unshift(type) } if item.children - file << "#{" " * item.depth}#{item.type}\n" + def add_node(node, top) + if opening_tag?(node.type) + add_opening_tag(node, top) + else + add_other_tag(node, top) end - file.close end -end - -# dom = DomTree.new -# file = File.open("test.html", "rb") -# contents = file.read -# file.close -# dom.build_tree(contents) -# dom.render + def add_opening_tag(node, top) + top.children << node + node.children, node.parent, node.depth = + [], top, (top.depth + 1) + node + end -# "
-# div text before -#

-# p text -#

-#
-# more div text -#
-# div text after -#
" + def add_other_tag(node, top) + if closing_tag?(node.type) + top.parent + else + top.children << node + node.parent, node.depth = top, (top.depth + 1) + top + end + end + def opening_tag?(item) + item[0] == "<" && item[1] != "/" + end + def closing_tag?(item) + item[0] == "<" && item[1] == "/" + end + def build_node(type) + Node.new(type) + end -# # The HTML string version -# # You could read it in from a file instead if so inclined, -# # which would give you newline characters too -# html_string = "
div text before

p text

more div text
div text after
" + def print_to_file + file = File.open('output.html', 'w') + render(@document, file) + file.close + end -# # Now pull that string into a simple data structure -# data_structure = parser_script( html_string ) + def render(top, file) + file << "#{" " * top.depth}#{top.type}\n" + top.children.each { |element| render(element, file) } if top.children + file << "#{" " * top.depth}#{make_closing(top.type)}>\n" if opening_tag?(top.type) + end -# # Finally, output the string again. -# # It doesn't have to have pretty spacing like it does here... -# outputter( data_structure ) -# #
-# # div text before -# #

-# # p text -# #

-# #
-# # more div text -# #
-# # div text after -# #
+ def make_closing(tag) + match = tag.match(/<(\w*\d*)/).to_s + match.insert(1, "/") + end +end diff --git a/node.rb b/node.rb new file mode 100644 index 0000000..93700bd --- /dev/null +++ b/node.rb @@ -0,0 +1 @@ +Node = Struct.new(:type, :parent, :depth, :children ) diff --git a/node_renderer.rb b/node_renderer.rb index bbc64e8..2ba5ad6 100644 --- a/node_renderer.rb +++ b/node_renderer.rb @@ -12,42 +12,51 @@ def render(node) end def nodes_below(node) - queue = [node] + stack = [node] count = 0 - while item = queue.pop - if children = item.children - children.each do |child| - queue << child - count += 1 - end - end + while item = stack.pop + count += item.children.length if item.children + stack += add_children_to_stack(item) end p "There are #{count} children in this nodes subtree" end def node_type_count(node) - queue = [node] + stack = [node] type_hash = Hash.new(0) - while item = queue.pop + while item = stack.pop match = get_type(item.type) - if match != "" && item != node - if match == nil - type_hash["text"] += 1 - else - type_hash[match] += 1 - end - end - if children = item.children - children.each do |child| - queue << child - end - end + type_hash = update_hash(match, type_hash, item, node) + stack += add_children_to_stack(item) end - type_hash.each do |key, val| + print_hash(type_hash) + end + + def add_children_to_stack(item) + stack = [] + if children = item.children + children.each { |child| stack << child } + end + stack + end + + def print_hash(hash) + hash.each do |key, val| puts "There are #{val} #{key}(s) in this nodes subtree" end end + def update_hash(match, hash, item, node) + if blank_or_star_node?(match, item, node) + match == nil ? hash["text"] += 1 : hash[match] += 1 + end + hash + end + + def blank_or_star_node?(match, item, node) + match != "" && item != node + end + def get_type(tag) if match = tag.match(/<([a-z]*\d*)\W/) match.captures[0] @@ -55,22 +64,15 @@ def get_type(tag) end def node_attributes(node) - if attributes = get_attibutes(node.type) + if attributes = get_node_attr(node.type) p attributes end end - def get_attibutes(tag) + def get_node_attr(tag) if match = tag.match(/<[a-z]*\d*(.*)>/) match.captures[0] end end end - -# dom = DomTree.new -# file = File.open("test.html", "rb") -# contents = file.read -# file.close -# dom.build_tree(contents) -# render = NodeRenderer(dom.document) diff --git a/output.html b/output.html new file mode 100644 index 0000000..0a5d9bf --- /dev/null +++ b/output.html @@ -0,0 +1,75 @@ +document head + + + + + This is a test page + + + +
+ I'm an outer div!!! +
+ I'm an inner div!!! I might just + + emphasize + + some text. +
+ I am EVEN MORE TEXT for the SAME div!!! +
+
+
+

+ Welcome to the test doc! +

+

+ This document contains data +

+
+
    + Here is the data: +
  • + Four list items +
  • +
  • + One unordered list +
  • +
  • + One h +
  • +
  • + One h +
  • +
  • + One header +
  • +
  • + One main +
  • +
  • + One body +
  • +
  • + One html +
  • +
  • + One title +
  • +
  • + One head +
  • +
  • + One doctype +
  • +
  • + Two divs +
  • +
  • + And infinite fun! +
  • +
+
+ + + diff --git a/test.html b/test.html index 6bc7dfd..1348ad8 100644 --- a/test.html +++ b/test.html @@ -26,7 +26,7 @@

Here is the data:
  • Four list items
  • One unordered list
  • -
  • One h1
  • +
  • One h1
  • One h2
  • One header
  • One main
  • diff --git a/tree_searcher.rb b/tree_searcher.rb index 7a5250b..15c283f 100644 --- a/tree_searcher.rb +++ b/tree_searcher.rb @@ -1,3 +1,4 @@ +require_relative "node" require_relative "dom_tree" require_relative "node_renderer" @@ -8,77 +9,38 @@ def initialize(tree) end def search_by(attribute, text) - send(attribute, text) - end - - def class(text) - queue = [@tree] + stack = [@tree] matching_nodes = [] - while node = queue.pop - if classes = get_class(node.type) - classes.each do |cls| - matching_nodes << node if cls == text - end - end - if children = node.children - children.each { |child| queue << child} - end + while node = stack.pop + matching_nodes = match_attributes(node, attribute, text, + matching_nodes) + stack += add_children_to_stack(node) end matching_nodes end - def get_class(str) - if pattern = str.match(/class\s?=\s?"(.*?)"/) - pattern.captures[0].split - end - end - - def id(text) - queue = [@tree] - matching_nodes = [] - while node = queue.pop - matching_nodes << node if text == get_id(node.type) - if children = node.children - children.each { |child| queue << child} - end + def add_children_to_stack(node) + stack = [] + if children = node.children + children.each { |child| stack << child} end - matching_nodes + stack end - def get_id(str) - if pattern = str.match(/id\s?=\s?"(.*?)"/) - pattern.captures[0] + def match_attributes(node, attribute, text, arr) + if att = get_attribute(node.type, attribute.to_s) + att.each { |item| arr << node if item == text } + elsif text == node.type + arr << node end + arr end - def name(text) - queue = [@tree] - matching_nodes = [] - while node = queue.pop - matching_nodes << node if text == get_name(node.type) - if children = node.children - children.each { |child| queue << child} - end + def get_attribute(str, attribute) + if pattern = str.match(/#{attribute}\s?=\s?"(.*?)"/) + return pattern.captures[0].split if attribute == "class" + pattern.captures end - matching_nodes - end - - def get_name(str) - if pattern = str.match(/name\s?=\s?"(.*?)"/) - pattern.captures[0] - end - end - - def text(text) - queue = [@tree] - matching_nodes = [] - while node = queue.pop - matching_nodes << node if text == node.type - if children = node.children - children.each { |child| queue << child} - end - end - matching_nodes end end @@ -88,12 +50,7 @@ def text(text) contents = file.read file.close dom.build_tree(contents) -TreeSearcher.new(dom.document) -#node = searcher.search_by(:class, "top-div")[0] -#NodeRenderer.new(dom.document).render(nil) -dom.render - -def is_pangram?(str) - str_hash = str.each_with_object({}){ |letter, obj| obj[letter] = true} - ('a'..'z').all? { |letter| str_hash.has_key?(letter) } -end +searcher = TreeSearcher.new(dom.document) +searcher.search_by(:class, "funky")[0] +NodeRenderer.new(dom.document).render(nil) +dom.print_to_file From 91f7ac17024c87b6bcee47120496123fb7f9566e Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Mon, 18 Jul 2016 16:00:16 -0400 Subject: [PATCH 15/22] updated render method --- output.html | 75 ------------------------------------------------ tree_searcher.rb | 10 +++---- 2 files changed, 5 insertions(+), 80 deletions(-) delete mode 100644 output.html diff --git a/output.html b/output.html deleted file mode 100644 index 0a5d9bf..0000000 --- a/output.html +++ /dev/null @@ -1,75 +0,0 @@ -document head - - - - - This is a test page - - - -
    - I'm an outer div!!! -
    - I'm an inner div!!! I might just - - emphasize - - some text. -
    - I am EVEN MORE TEXT for the SAME div!!! -
    -
    -
    -

    - Welcome to the test doc! -

    -

    - This document contains data -

    -
    -
      - Here is the data: -
    • - Four list items -
    • -
    • - One unordered list -
    • -
    • - One h -
    • -
    • - One h -
    • -
    • - One header -
    • -
    • - One main -
    • -
    • - One body -
    • -
    • - One html -
    • -
    • - One title -
    • -
    • - One head -
    • -
    • - One doctype -
    • -
    • - Two divs -
    • -
    • - And infinite fun! -
    • -
    -
    - - - diff --git a/tree_searcher.rb b/tree_searcher.rb index 15c283f..a9ef647 100644 --- a/tree_searcher.rb +++ b/tree_searcher.rb @@ -12,8 +12,7 @@ def search_by(attribute, text) stack = [@tree] matching_nodes = [] while node = stack.pop - matching_nodes = match_attributes(node, attribute, text, - matching_nodes) + matching_nodes += match_attributes(node, attribute, text) stack += add_children_to_stack(node) end matching_nodes @@ -27,7 +26,8 @@ def add_children_to_stack(node) stack end - def match_attributes(node, attribute, text, arr) + def match_attributes(node, attribute, text) + arr = [] if att = get_attribute(node.type, attribute.to_s) att.each { |item| arr << node if item == text } elsif text == node.type @@ -51,6 +51,6 @@ def get_attribute(str, attribute) file.close dom.build_tree(contents) searcher = TreeSearcher.new(dom.document) -searcher.search_by(:class, "funky")[0] -NodeRenderer.new(dom.document).render(nil) +node = searcher.search_by(:class, "bold")[1] +NodeRenderer.new(dom.document).render(node) dom.print_to_file From 71186fbb95462c0a19a797e5a4ae32c856d60bc1 Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Mon, 18 Jul 2016 16:59:30 -0400 Subject: [PATCH 16/22] added rspec --- .rspec | 2 + dom_tree.rb => lib/dom_tree.rb | 2 +- node.rb => lib/node.rb | 0 node_renderer.rb => lib/node_renderer.rb | 0 tree_searcher.rb => lib/tree_searcher.rb | 2 +- output.html | 75 ++++++++++++++++++ spec/dom_tree_spec.rb | 63 +++++++++++++++ spec/node_render_spec.rb | 21 +++++ spec/spec_helper.rb | 97 ++++++++++++++++++++++++ spec/tree_searcher_spec.rb | 20 +++++ spec/warmups_spec.rb | 1 + test.html | 2 +- 12 files changed, 282 insertions(+), 3 deletions(-) create mode 100644 .rspec rename dom_tree.rb => lib/dom_tree.rb (95%) rename node.rb => lib/node.rb (100%) rename node_renderer.rb => lib/node_renderer.rb (100%) rename tree_searcher.rb => lib/tree_searcher.rb (96%) create mode 100644 output.html create mode 100755 spec/dom_tree_spec.rb create mode 100755 spec/node_render_spec.rb create mode 100755 spec/spec_helper.rb create mode 100755 spec/tree_searcher_spec.rb create mode 100755 spec/warmups_spec.rb diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..83e16f8 --- /dev/null +++ b/.rspec @@ -0,0 +1,2 @@ +--color +--require spec_helper diff --git a/dom_tree.rb b/lib/dom_tree.rb similarity index 95% rename from dom_tree.rb rename to lib/dom_tree.rb index 7256b7e..0c6a31a 100644 --- a/dom_tree.rb +++ b/lib/dom_tree.rb @@ -6,7 +6,7 @@ def initialize end def parse_string(str) - str.scan(/<.*?>|[[a-zA-Z]\p{P}\s]*/).map(&:strip).reject(&:empty?) + str.scan(/<.*?>|[[a-zA-Z]\d\p{P}\s]*/).map(&:strip).reject(&:empty?) end def build_tree(str) diff --git a/node.rb b/lib/node.rb similarity index 100% rename from node.rb rename to lib/node.rb diff --git a/node_renderer.rb b/lib/node_renderer.rb similarity index 100% rename from node_renderer.rb rename to lib/node_renderer.rb diff --git a/tree_searcher.rb b/lib/tree_searcher.rb similarity index 96% rename from tree_searcher.rb rename to lib/tree_searcher.rb index a9ef647..bbe8e1c 100644 --- a/tree_searcher.rb +++ b/lib/tree_searcher.rb @@ -51,6 +51,6 @@ def get_attribute(str, attribute) file.close dom.build_tree(contents) searcher = TreeSearcher.new(dom.document) -node = searcher.search_by(:class, "bold")[1] +node = searcher.search_by(:class, "top-div")[0] NodeRenderer.new(dom.document).render(node) dom.print_to_file diff --git a/output.html b/output.html new file mode 100644 index 0000000..55d5ad2 --- /dev/null +++ b/output.html @@ -0,0 +1,75 @@ +document head + + + + + This is a test page + + + +
    + I'm an outer div!!! +
    + I'm an inner div!!! I might just + + emphasize + + some text. +
    + I am EVEN MORE TEXT for the SAME div!!! +
    +
    +
    +

    + Welcome to the test doc! +

    +

    + This document contains data +

    +
    +
      + Here is the data: +
    • + Four list items +
    • +
    • + One unordered list +
    • +
    • + One h1 +
    • +
    • + One h2 +
    • +
    • + One header +
    • +
    • + One main +
    • +
    • + One body +
    • +
    • + One html +
    • +
    • + One title +
    • +
    • + One head +
    • +
    • + One doctype +
    • +
    • + Two divs +
    • +
    • + And infinite fun! +
    • +
    +
    + + + diff --git a/spec/dom_tree_spec.rb b/spec/dom_tree_spec.rb new file mode 100755 index 0000000..0fbd347 --- /dev/null +++ b/spec/dom_tree_spec.rb @@ -0,0 +1,63 @@ + +require 'dom_tree' + +describe DomTree do + + let(:dom_tree) { DomTree.new } + let(:dom_tree_error) { DomTree.new("arguments")} + + + + describe '#initialize' do + it 'returns an instance of DomTree' do + expect(dom_tree).to be_an_instance_of(DomTree) + end + + it 'will return an error if initalized with an argument' do + expect {dom_tree_error}.to raise_error(ArgumentError) + end + + it 'creates a document node with the type set to document head' do + expect(dom_tree.document.type).to eq("document head") + end + end + + + describe '#parse_string' do + let (:test_html) { "
    + div text before +

    + p text +

    +
    + more div text +
    + div text after +
    " + } + + it 'takes a string as an argument' do + expect { dom_tree.parse_string("test") }.to_not raise_error + end + + it 'removes html formatting and retuns an array of strings for each element' do + expect(dom_tree.parse_string(test_html)).to eq(["
    ", "div text before", "

    ", "p text", "

    ", "
    ", "more div text", "
    ", "div text after", "
    "]) + end + + end + + # describe '#build_tree' do + + + # end + + + + + # end + + + + + +end \ No newline at end of file diff --git a/spec/node_render_spec.rb b/spec/node_render_spec.rb new file mode 100755 index 0000000..4be0172 --- /dev/null +++ b/spec/node_render_spec.rb @@ -0,0 +1,21 @@ +require 'spec_helper' +require 'node_render' + +describe NodeRenderer do + let(:node_render) { NodeRenderer.new("tree")} + + + describe '#intialize' do + it 'returns an instance of NodeRenderer' do + expect(node_render).to be_an_instance_of(NodeRenderer) + end + + it 'takes one argument' do + expect{node_render}.to_not raise_error + end + + + end + + +end \ No newline at end of file diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100755 index 0000000..ef48f3e --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,97 @@ + +# This file was generated by the `rspec --init` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# The `.rspec` file also contains a few flags that are not defaults but that +# users commonly want. +# +# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # These two settings work together to allow you to limit a spec run + # to individual examples or groups you care about by tagging them with + # `:focus` metadata. When nothing is tagged with `:focus`, all examples + # get run. + config.filter_run :focus + config.run_all_when_everything_filtered = true + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/ + # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ + # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode + config.disable_monkey_patching! + + # This setting enables warnings. It's recommended, but in some cases may + # be too noisy due to issues in dependencies. + config.warnings = true + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = 'doc' + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end diff --git a/spec/tree_searcher_spec.rb b/spec/tree_searcher_spec.rb new file mode 100755 index 0000000..c57e977 --- /dev/null +++ b/spec/tree_searcher_spec.rb @@ -0,0 +1,20 @@ +require 'tree_searcher' +require 'node_render' +require 'dom_tree' + +describe TreeSearcher do + let(:tree_search) {TreeSearcher.new("tree")} + + describe '#initialize' do + it 'returns an instance of TreeSearcher' do + expect(tree_search).to be_an_instance_of(TreeSearcher) + end + + + end + + + + + +end \ No newline at end of file diff --git a/spec/warmups_spec.rb b/spec/warmups_spec.rb new file mode 100755 index 0000000..335cafc --- /dev/null +++ b/spec/warmups_spec.rb @@ -0,0 +1 @@ +require 'spec_helper' \ No newline at end of file diff --git a/test.html b/test.html index 1348ad8..03da881 100644 --- a/test.html +++ b/test.html @@ -6,7 +6,7 @@ -
    +
    I'm an outer div!!!
    I'm an inner div!!! I might just emphasize some text. From 019c9183ec25a21d07b9bbf20faa5ecaa0c2536e Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Mon, 18 Jul 2016 16:21:03 -0500 Subject: [PATCH 17/22] fixed issues with tests --- .rspec | 1 + lib/dom_tree.rb | 2 ++ lib/node_renderer.rb | 1 + spec/dom_tree_spec.rb | 1 + spec/{node_render_spec.rb => node_renderer_spec.rb} | 6 ++---- spec/tree_searcher_spec.rb | 2 -- 6 files changed, 7 insertions(+), 6 deletions(-) rename spec/{node_render_spec.rb => node_renderer_spec.rb} (87%) diff --git a/.rspec b/.rspec index 83e16f8..8670fa1 100644 --- a/.rspec +++ b/.rspec @@ -1,2 +1,3 @@ +--format doc --color --require spec_helper diff --git a/lib/dom_tree.rb b/lib/dom_tree.rb index 0c6a31a..46d2a8d 100644 --- a/lib/dom_tree.rb +++ b/lib/dom_tree.rb @@ -1,3 +1,5 @@ +require 'node' + class DomTree attr_reader :string, :document diff --git a/lib/node_renderer.rb b/lib/node_renderer.rb index 2ba5ad6..a83deac 100644 --- a/lib/node_renderer.rb +++ b/lib/node_renderer.rb @@ -1,3 +1,4 @@ + class NodeRenderer def initialize(tree) diff --git a/spec/dom_tree_spec.rb b/spec/dom_tree_spec.rb index 0fbd347..ef64172 100755 --- a/spec/dom_tree_spec.rb +++ b/spec/dom_tree_spec.rb @@ -1,4 +1,5 @@ +require 'node' require 'dom_tree' describe DomTree do diff --git a/spec/node_render_spec.rb b/spec/node_renderer_spec.rb similarity index 87% rename from spec/node_render_spec.rb rename to spec/node_renderer_spec.rb index 4be0172..dd6db95 100755 --- a/spec/node_render_spec.rb +++ b/spec/node_renderer_spec.rb @@ -1,5 +1,5 @@ -require 'spec_helper' -require 'node_render' + +require 'node_renderer' describe NodeRenderer do let(:node_render) { NodeRenderer.new("tree")} @@ -14,8 +14,6 @@ expect{node_render}.to_not raise_error end - end - end \ No newline at end of file diff --git a/spec/tree_searcher_spec.rb b/spec/tree_searcher_spec.rb index c57e977..f8d1cd6 100755 --- a/spec/tree_searcher_spec.rb +++ b/spec/tree_searcher_spec.rb @@ -1,6 +1,4 @@ require 'tree_searcher' -require 'node_render' -require 'dom_tree' describe TreeSearcher do let(:tree_search) {TreeSearcher.new("tree")} From ebf59d55b45da3f3197c26f7a6cc6c648e12ca52 Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Mon, 18 Jul 2016 16:27:24 -0500 Subject: [PATCH 18/22] added a few more tests to build_tree --- spec/dom_tree_spec.rb | 48 ++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/spec/dom_tree_spec.rb b/spec/dom_tree_spec.rb index ef64172..0d2e751 100755 --- a/spec/dom_tree_spec.rb +++ b/spec/dom_tree_spec.rb @@ -6,9 +6,18 @@ let(:dom_tree) { DomTree.new } let(:dom_tree_error) { DomTree.new("arguments")} - - - + let (:test_html) { "
    + div text before +

    + p text +

    +
    + more div text +
    + div text after +
    " + } + describe '#initialize' do it 'returns an instance of DomTree' do expect(dom_tree).to be_an_instance_of(DomTree) @@ -25,17 +34,7 @@ describe '#parse_string' do - let (:test_html) { "
    - div text before -

    - p text -

    -
    - more div text -
    - div text after -
    " - } + it 'takes a string as an argument' do expect { dom_tree.parse_string("test") }.to_not raise_error @@ -47,18 +46,15 @@ end - # describe '#build_tree' do - - - # end - - - - - # end - - - + describe '#build_tree' do + it 'takes a string as an argument' do + expect { dom_tree.build_tree("test") }.to_not raise_error + end + it 'after building a tree the head node has correct number of children' do + dom_tree.build_tree(test_html) + expect(dom_tree.document.children.length).to eq(1) + end + end end \ No newline at end of file From 78059dc2a6bd3ac993957675de3bf2483ab1ffbc Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Mon, 18 Jul 2016 16:46:00 -0500 Subject: [PATCH 19/22] added tests for treesearcher --- spec/tree_searcher_spec.rb | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/spec/tree_searcher_spec.rb b/spec/tree_searcher_spec.rb index f8d1cd6..4832dba 100755 --- a/spec/tree_searcher_spec.rb +++ b/spec/tree_searcher_spec.rb @@ -2,17 +2,39 @@ describe TreeSearcher do let(:tree_search) {TreeSearcher.new("tree")} + let(:dom_tree) { DomTree.new } + let (:test_html) { "
    + div text before +

    + p text +

    +
    + more div text +
    + div text after +
    " + } + + describe '#initialize' do it 'returns an instance of TreeSearcher' do expect(tree_search).to be_an_instance_of(TreeSearcher) end - end + describe '#search_by' do + it 'returns all matching nodes' do + dom_tree.build_tree(test_html) + search = TreeSearcher.new(dom_tree.document) + expect(search.search_by(:class, 'test')[0]).to be_a(Node) + end + end + + end \ No newline at end of file From 0e7089b098512ca28bfb3962dc0b87d50bc85903 Mon Sep 17 00:00:00 2001 From: chrisgoodson Date: Mon, 18 Jul 2016 16:59:42 -0500 Subject: [PATCH 20/22] monkey patching node class --- lib/node.rb | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/node.rb b/lib/node.rb index 93700bd..5b9def9 100644 --- a/lib/node.rb +++ b/lib/node.rb @@ -1 +1,16 @@ -Node = Struct.new(:type, :parent, :depth, :children ) +Node = Struct.new(:type, :parent, :depth, :children, :attributes) + + +class Node + + def self.build_attributes_hash + self.attributes << self. + + + end + + + + + +end \ No newline at end of file From 01a68173d6595a0f72bdd69458d6dbd76a2317ca Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Mon, 18 Jul 2016 18:36:05 -0400 Subject: [PATCH 21/22] completed build_attributes_hash --- lib/dom_tree.rb | 2 -- lib/node.rb | 28 ++++++++++++---------------- lib/tree_searcher.rb | 6 ++++-- output.html | 2 +- test.html | 2 +- 5 files changed, 18 insertions(+), 22 deletions(-) diff --git a/lib/dom_tree.rb b/lib/dom_tree.rb index 46d2a8d..0c6a31a 100644 --- a/lib/dom_tree.rb +++ b/lib/dom_tree.rb @@ -1,5 +1,3 @@ -require 'node' - class DomTree attr_reader :string, :document diff --git a/lib/node.rb b/lib/node.rb index 5b9def9..80915ca 100644 --- a/lib/node.rb +++ b/lib/node.rb @@ -1,16 +1,12 @@ -Node = Struct.new(:type, :parent, :depth, :children, :attributes) - - -class Node - - def self.build_attributes_hash - self.attributes << self. - - - end - - - - - -end \ No newline at end of file +Node = Struct.new(:type, :parent, :depth, :children, :attributes) do + + def build_attributes_hash + att_hash = {} + attribute_pairs = self.type.scan((/([[a-zA-Z]\d\p{p}]*)\s*=\s*\p{P}([\w\p{Pd}]*)/)) + solo = self.type.scan(/\s(?]/).flatten + attribute_pairs.each { |item| att_hash[item[0]] = item[1] } + self.attributes = att_hash + solo.each { |item| att_hash[item] = true } + end + +end diff --git a/lib/tree_searcher.rb b/lib/tree_searcher.rb index bbe8e1c..9f434cd 100644 --- a/lib/tree_searcher.rb +++ b/lib/tree_searcher.rb @@ -51,6 +51,8 @@ def get_attribute(str, attribute) file.close dom.build_tree(contents) searcher = TreeSearcher.new(dom.document) -node = searcher.search_by(:class, "top-div")[0] -NodeRenderer.new(dom.document).render(node) +node = searcher.search_by(:class, "top-div")[0] +node.build_attributes_hash +p node.attributes +#NodeRenderer.new(dom.document).render(node) dom.print_to_file diff --git a/output.html b/output.html index 55d5ad2..c0e3732 100644 --- a/output.html +++ b/output.html @@ -7,7 +7,7 @@ -
    +
    I'm an outer div!!!
    I'm an inner div!!! I might just diff --git a/test.html b/test.html index 03da881..17c779c 100644 --- a/test.html +++ b/test.html @@ -6,7 +6,7 @@ -
    +
    I'm an outer div!!!
    I'm an inner div!!! I might just emphasize some text. From ba88fca1255482e1846d2277f5f701237d5a3377 Mon Sep 17 00:00:00 2001 From: Graham Turner Date: Mon, 18 Jul 2016 19:39:18 -0400 Subject: [PATCH 22/22] updated node regular expression --- lib/dom_tree.rb | 4 +++- lib/node.rb | 11 +++++++---- lib/node_renderer.rb | 10 +--------- lib/tree_searcher.rb | 4 +--- output.html | 2 +- test.html | 2 +- 6 files changed, 14 insertions(+), 19 deletions(-) diff --git a/lib/dom_tree.rb b/lib/dom_tree.rb index 0c6a31a..98826dc 100644 --- a/lib/dom_tree.rb +++ b/lib/dom_tree.rb @@ -52,7 +52,9 @@ def closing_tag?(item) end def build_node(type) - Node.new(type) + node = Node.new(type) + node.build_attributes_hash if opening_tag?(node.type) + node end def print_to_file diff --git a/lib/node.rb b/lib/node.rb index 80915ca..62ca697 100644 --- a/lib/node.rb +++ b/lib/node.rb @@ -2,11 +2,14 @@ def build_attributes_hash att_hash = {} - attribute_pairs = self.type.scan((/([[a-zA-Z]\d\p{p}]*)\s*=\s*\p{P}([\w\p{Pd}]*)/)) - solo = self.type.scan(/\s(?]/).flatten - attribute_pairs.each { |item| att_hash[item[0]] = item[1] } + attribute_pairs = self.type.scan((/([[a-zA-Z]\d\p{p}]*)\s*=\s*\"([[a-zA-Z]\p{Pd}\s*]*)"/)) + solo = self.type.scan(/[[a-zA-Z]\d\p{p}]*\s*=\s*\"[[a-zA-Z]\p{Pd}\s*]*"|\s(?]/).flatten + attribute_pairs.each do |item| + item[1] = item[1].split(" ") + att_hash[item[0]] = item[1] + end + solo.each { |item| att_hash[item] = true unless item.nil?} self.attributes = att_hash - solo.each { |item| att_hash[item] = true } end end diff --git a/lib/node_renderer.rb b/lib/node_renderer.rb index a83deac..511298f 100644 --- a/lib/node_renderer.rb +++ b/lib/node_renderer.rb @@ -65,15 +65,7 @@ def get_type(tag) end def node_attributes(node) - if attributes = get_node_attr(node.type) - p attributes - end - end - - def get_node_attr(tag) - if match = tag.match(/<[a-z]*\d*(.*)>/) - match.captures[0] - end + p node.attributes end end diff --git a/lib/tree_searcher.rb b/lib/tree_searcher.rb index 9f434cd..99fcdd5 100644 --- a/lib/tree_searcher.rb +++ b/lib/tree_searcher.rb @@ -52,7 +52,5 @@ def get_attribute(str, attribute) dom.build_tree(contents) searcher = TreeSearcher.new(dom.document) node = searcher.search_by(:class, "top-div")[0] -node.build_attributes_hash -p node.attributes -#NodeRenderer.new(dom.document).render(node) +NodeRenderer.new(dom.document).render(node) dom.print_to_file diff --git a/output.html b/output.html index c0e3732..aef2854 100644 --- a/output.html +++ b/output.html @@ -7,7 +7,7 @@ -
    +
    I'm an outer div!!!
    I'm an inner div!!! I might just diff --git a/test.html b/test.html index 17c779c..2ad84a0 100644 --- a/test.html +++ b/test.html @@ -6,7 +6,7 @@ -
    +
    I'm an outer div!!!
    I'm an inner div!!! I might just emphasize some text.