diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..7e3eb5c --- /dev/null +++ b/.rspec @@ -0,0 +1,3 @@ +--color +--require spec_helper +--format doc diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..ce3fe7f --- /dev/null +++ b/Gemfile @@ -0,0 +1,6 @@ +# Gemfile + +source 'https://rubygems.org' + +gem 'rspec', '~> 3.5.0' +gem 'guard-rspec', '~> 4.7.3', require: false \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..3874780 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,65 @@ +GEM + remote: https://rubygems.org/ + specs: + coderay (1.1.1) + diff-lcs (1.3) + ffi (1.9.18) + formatador (0.2.5) + guard (2.14.1) + formatador (>= 0.2.4) + listen (>= 2.7, < 4.0) + lumberjack (~> 1.0) + nenv (~> 0.1) + notiffany (~> 0.0) + pry (>= 0.9.12) + shellany (~> 0.0) + thor (>= 0.18.1) + guard-compat (1.2.1) + guard-rspec (4.7.3) + guard (~> 2.1) + guard-compat (~> 1.1) + rspec (>= 2.99.0, < 4.0) + listen (3.1.5) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + ruby_dep (~> 1.2) + lumberjack (1.0.11) + method_source (0.8.2) + nenv (0.3.0) + notiffany (0.1.1) + nenv (~> 0.1) + shellany (~> 0.0) + pry (0.10.4) + coderay (~> 1.1.0) + method_source (~> 0.8.1) + slop (~> 3.4) + rb-fsevent (0.9.8) + rb-inotify (0.9.8) + ffi (>= 0.5.0) + rspec (3.5.0) + rspec-core (~> 3.5.0) + rspec-expectations (~> 3.5.0) + rspec-mocks (~> 3.5.0) + rspec-core (3.5.4) + rspec-support (~> 3.5.0) + rspec-expectations (3.5.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.5.0) + rspec-mocks (3.5.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.5.0) + rspec-support (3.5.0) + ruby_dep (1.5.0) + shellany (0.0.1) + slop (3.6.0) + thor (0.19.4) + +PLATFORMS + ruby + +DEPENDENCIES + guard-rspec (~> 4.7.3) + rspec (~> 3.5.0) + +BUNDLED WITH + 1.14.4 diff --git a/Guardfile b/Guardfile new file mode 100644 index 0000000..3215f01 --- /dev/null +++ b/Guardfile @@ -0,0 +1,70 @@ +# A sample Guardfile +# More info at https://github.com/guard/guard#readme + +## Uncomment and set this to only include directories you want to watch +# directories %w(app lib config test spec features) \ +# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")} + +## Note: if you are using the `directories` clause above and you are not +## watching the project directory ('.'), then you will want to move +## the Guardfile to a watched dir and symlink it back, e.g. +# +# $ mkdir config +# $ mv Guardfile config/ +# $ ln -s config/Guardfile . +# +# and, you'll have to watch "config/Guardfile" instead of "Guardfile" + +# Note: The cmd option is now required due to the increasing number of ways +# rspec may be run, below are examples of the most common uses. +# * bundler: 'bundle exec rspec' +# * bundler binstubs: 'bin/rspec' +# * spring: 'bin/rspec' (This will use spring if running and you have +# installed the spring binstubs per the docs) +# * zeus: 'zeus rspec' (requires the server to be started separately) +# * 'just' rspec: 'rspec' + +guard :rspec, cmd: "bundle exec rspec" do + require "guard/rspec/dsl" + dsl = Guard::RSpec::Dsl.new(self) + + # Feel free to open issues for suggestions and improvements + + # RSpec files + rspec = dsl.rspec + watch(rspec.spec_helper) { rspec.spec_dir } + watch(rspec.spec_support) { rspec.spec_dir } + watch(rspec.spec_files) + + # Ruby files + ruby = dsl.ruby + dsl.watch_spec_files_for(ruby.lib_files) + + # Rails files + rails = dsl.rails(view_extensions: %w(erb haml slim)) + dsl.watch_spec_files_for(rails.app_files) + dsl.watch_spec_files_for(rails.views) + + watch(rails.controllers) do |m| + [ + rspec.spec.call("routing/#{m[1]}_routing"), + rspec.spec.call("controllers/#{m[1]}_controller"), + rspec.spec.call("acceptance/#{m[1]}") + ] + end + + # Rails config changes + watch(rails.spec_helper) { rspec.spec_dir } + watch(rails.routes) { "#{rspec.spec_dir}/routing" } + watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" } + + # Capybara features specs + watch(rails.view_dirs) { |m| rspec.spec.call("features/#{m[1]}") } + watch(rails.layouts) { |m| rspec.spec.call("features/#{m[1]}") } + + # Turnip features and steps + watch(%r{^spec/acceptance/(.+)\.feature$}) + watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m| + Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance" + end +end diff --git a/README.md b/README.md index eb609bd..17cb21c 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,21 @@ Like leaves on the wind [A data structures, algorithms, file I/O, ruby and regular expression (regex) project from the Viking Code School](http://www.vikingcodeschool.com) + +Worked on by [Roy Chen](https://github.com/roychen25) + +## Getting Started + +To run this program, fork and clone this repository. + +In the cloned directory, run this command: + +``` +ruby example.rb +``` + +The output includes: + +1. Printing information about the root node of the DOM tree created + +2. Rebuilding the DOM tree into its original format diff --git a/example.rb b/example.rb new file mode 100644 index 0000000..cf7171a --- /dev/null +++ b/example.rb @@ -0,0 +1,15 @@ +if $0 == __FILE__ + require_relative './lib/dom_reader' + require_relative './lib/node_renderer' + require_relative './lib/dom_rebuilder' + + dom_reader = DOMReader.new + + tree = dom_reader.build_tree('./test.html') + + node_renderer = NodeRenderer.new(tree) + node_renderer.render + + dom_rebuilder = DOMRebuilder.new(tree) + dom_rebuilder.print_tree +end diff --git a/lib/dom_reader.rb b/lib/dom_reader.rb new file mode 100644 index 0000000..f6aa9b6 --- /dev/null +++ b/lib/dom_reader.rb @@ -0,0 +1,113 @@ +require_relative './dom_tree' + +class DOMReader + def initialize; end + + def build_tree(filename) + # break file into tokens + tokens = tokenize_file(filename) + + # exit if we have no tokens + return nil if tokens.empty? + + # remove doctype declaration, if any + tokens = remove_doctype(tokens) + + # initialize the tree + tree = DOMTree.new + current_node = tree.document + + # set current depth of the tree + current_depth = 0 + + # create tree's root node if it does not exist yet + if tree.document.nil? + token = tokens.shift + tree.document = Node.new(token_type(token), token, current_depth, nil, []) + current_node = tree.document + end + + # process remaining tokens + until tokens.empty? + token = tokens.shift + token_type = token_type(token) + + node = Node.new(token_type, token, current_depth, current_node, []) + current_node.children << node + + case token_type + when :open_tag + current_node = node + current_depth += 1 + when :text + next + when :close_tag + current_node = current_node.parent unless current_node == tree.document + current_depth -= 1 unless current_depth == 0 + when :unknown + # ignore unknown tag for now + next + end + end + + tree + end + + def read_file(filename) + raise "The file to be read does not exist." unless File.exist?(filename) + + File.readlines(filename) + end + + def tokenize_file(filename) + lines = read_file(filename) + tokens = [] + lines.each { |line| tokens << tokenize(line) } + tokens.flatten! + end + + def tokenize(text) + regex = /(<.+?>|[^<>]+|<\/\w+?>)/ + tokens = text.scan(regex).flatten + + tokens + end + + def remove_doctype(tokens) + tokens.delete_at(0) if is_doctype_tag?(tokens[0]) + tokens + end + + def token_type(token) + return :unknown unless token.is_a?(String) + + return :open_tag if is_open_tag?(token) + + return :close_tag if is_close_tag?(token) + + return :text if is_text?(token) + + :unknown + end + + def is_doctype_tag?(text) + regex = //i + !text.match(regex).nil? + end + + def is_open_tag?(text) + # regex = /^<(\w+)>$/ + regex = /^<(\w+)\s*(.+)*>$/ + !text.match(regex).nil? + end + + def is_close_tag?(text) + regex = /^<(\/\w+)>$/ + !text.match(regex).nil? + end + + def is_text?(text) + regex = /^[^<>\/]+$/ + !text.match(regex).nil? + end +end diff --git a/lib/dom_rebuilder.rb b/lib/dom_rebuilder.rb new file mode 100644 index 0000000..076ae5b --- /dev/null +++ b/lib/dom_rebuilder.rb @@ -0,0 +1,30 @@ +class DOMRebuilder + attr_reader :tree + + def initialize(tree = nil) + @tree = tree + end + + # print out the DOM tree using DFS + def print_tree(start_node = nil) + output = "" + + start_node = self.tree.document if start_node.nil? + + stack = [] + stack.push(start_node) + + until stack.empty? + current_node = stack.pop + + output << current_node.content + + # it's important to reverse the child nodes first before + # pushing onto the stack, so that they'll be printed out + # in the right order + current_node.children.reverse.each { |child| stack.push(child) } + end + + puts output + end +end diff --git a/lib/dom_tree.rb b/lib/dom_tree.rb new file mode 100644 index 0000000..4e374cc --- /dev/null +++ b/lib/dom_tree.rb @@ -0,0 +1,50 @@ +Node = Struct.new(:type, :content, :depth, :parent, :children) + +class DOMTree + attr_accessor :document + + def initialize(document = nil) + # root node of tree + @document = document + end + + # methods for traversing tree to be added here + + def num_nodes_below(node = nil) + return 0 if node.nil? || node.children.empty? + + count = 0 + queue = [] + node.children.each { |child| queue << child } + + until queue.empty? + current_node = queue.shift + count += 1 + current_node.children.each { |child| queue << child unless child.nil? } + end + + count + end + + def node_types_below(node = nil) + return {} if node.nil? || node.children.empty? + + node_types = {} + + queue = [] + node.children.each { |child| queue << child } + + until queue.empty? + current_node = queue.shift + current_node.children.each { |child| queue << child unless child.nil? } + + if node_types.keys.include?(current_node.type) + node_types[current_node.type] += 1 + else + node_types[current_node.type] = 1 + end + end + + node_types + end +end diff --git a/lib/node_renderer.rb b/lib/node_renderer.rb new file mode 100644 index 0000000..6492c0a --- /dev/null +++ b/lib/node_renderer.rb @@ -0,0 +1,38 @@ +class NodeRenderer + attr_reader :tree + + def initialize(tree = nil) + @tree = tree + end + + def render(node = nil) + display_data_attributes(node) + display_num_nodes_below(node) + display_node_types_below(node) + end + + def display_data_attributes(node = nil) + node = self.tree.document if node.nil? + + puts "=== Node data attributes ===" + puts "Node type: #{node.type}" + puts "Node content: #{node.content.inspect}" + puts "Node depth: #{node.depth}" + puts "Children: #{node.children.length}\n\n" + end + + def display_num_nodes_below(node = nil) + node = self.tree.document if node.nil? + + puts "=== Number of nodes in subtree(s) below ===" + puts "#{self.tree.num_nodes_below(node)}\n\n" + end + + def display_node_types_below(node = nil) + node = self.tree.document if node.nil? + node_types = self.tree.node_types_below(node) + + puts "=== Node types in subtree(s) below ===" + puts node_types + end +end diff --git a/lib/tree_searcher.rb b/lib/tree_searcher.rb new file mode 100644 index 0000000..191af98 --- /dev/null +++ b/lib/tree_searcher.rb @@ -0,0 +1,60 @@ +class TreeSearcher + attr_reader :tree + + def initialize(tree = nil) + @tree = tree + end + + def search_by(attribute, value) + case attribute + when :name + regex = /^<#{value}.*>$/i + node_type = value.start_with?("/") ? :close_tag : :open_tag + when :text + regex = /#{value}/i + node_type = :text + when :id + regex = /id\s*=\s*['"]#{value}['"]/i + node_type = :open_tag + when :class + regex = /class\s*=\s*['"].*?#{value}.*?['"]/i + node_type = :open_tag + else + puts "Invalid attribute to search for." + return nil + end + + find_nodes_by(node_type, regex, value) + end + + def search_descendents(start_node, attribute, value) + nodes = search_by(attribute, value) + + nodes.select { |node| node.depth > start_node.depth } + end + + def search_ancestors(start_node, attribute, value) + nodes = search_by(attribute, value) + + nodes.select { |node| node.depth < start_node.depth } + end + + private + + def find_nodes_by(node_type, regex, value) + # perform BFS on tree to find matching nodes + collection = [] + queue = [] + + queue << self.tree.document + until queue.empty? + current_node = queue.shift + + collection << current_node if current_node.content.match(regex) && current_node.type == node_type + + current_node.children.each { |child| queue << child unless child.nil? } + end + + collection + end +end diff --git a/spec/dom_reader_spec.rb b/spec/dom_reader_spec.rb new file mode 100644 index 0000000..0130947 --- /dev/null +++ b/spec/dom_reader_spec.rb @@ -0,0 +1,166 @@ +# spec/dom_reader_spec.rb + +require 'dom_reader' + +describe "DOMReader" do + let(:dom_reader) { DOMReader.new } + + describe "#initialize" do + it "creates an instance of DOMReader" do + expect(dom_reader).to be_a(DOMReader) + end + end + + describe "#build_tree" do + it "returns a tree of Nodes built from the input filename" do + filename = './test.html' + contents = ["\n", "\n", "
\n", "text
\n", "\n", "\n"] + + allow(File).to receive(:readlines).with(filename).and_return(contents) + + tree = dom_reader.build_tree(filename) + expect(tree).to be_a(DOMTree) + expect(tree.document).to be_a(Node) + expect(tree.document.type).to eq(:text) + expect(tree.document.children.first.type).to eq(:open_tag) + expect(tree.document.children.first.content).to eq("") + end + end + + describe "#read_file" do + it "raises an error if the file to be read does not exist" do + expect { dom_reader.read_file("blah") }.to raise_error(/file to be read does not exist/) + end + + it "returns the file's contents as an array" do + filename = './test.html' + contents = ["\n", "\n", "\n", "text
\n", "\n", "\n"] + + allow(File).to receive(:readlines).with(filename).and_return(contents) + + expect(dom_reader.read_file(filename)).to be_a(Array) + expect(dom_reader.read_file(filename)).to eq(contents) + end + end + + describe "#tokenize_file" do + it "returns the file's contents as an array of tokens" do + filename = './test.html' + contents = ["\n", "\n", "\n"] + + allow(File).to receive(:readlines).with(filename).and_return(contents) + + expected_result = ["", "\n", "", "\n", "", "\n"] + + expect(dom_reader.tokenize_file(filename)).to eq(expected_result) + end + end + + describe "#tokenize" do + it "breaks down the input string into tokens" do + test_string = "This document contains data\n" + tokens = dom_reader.tokenize(test_string) + + expect(tokens.length).to eq(1) + expect(tokens[0]).to eq("This document contains data\n") + end + + it "correctly tokenizes mixed content on the same line" do + test_string = "some text" + tokens = dom_reader.tokenize(test_string) + + expect(tokens.length).to eq(3) + expect(tokens[0]).to eq("") + expect(tokens[1]).to eq("some text") + expect(tokens[2]).to eq("") + end + end + + describe "#remove_doctype" do + it "removes any doctype declaration tokens from the provided list of tokens" do + tokens = ["", "\n", "", "\n", "", "\n"] + expected_result = ["\n", "", "\n", "", "\n"] + + expect(dom_reader.remove_doctype(tokens)).to eq(expected_result) + end + end + + describe "#token_type" do + it "returns :open_tag if the input is an opening tag" do + token = "" + expect(dom_reader.token_type(token)).to eq(:open_tag) + + token = "", 0, nil, []) } + + context "Node attributes" do + it "has a :type attribute" do + expect(node.type).to eq(:open_tag) + node.type = :close_tag + expect(node.type).to eq(:close_tag) + end + + it "has a :content attribute" do + expect(node.content).to eq("
") + node.content = "
" + expect(node.content).to eq("") + end + + it "has a :depth attribute" do + expect(node.depth).to eq(0) + node.depth = 1 + expect(node.depth).to eq(1) + end + + it "has a :parent attribute" do + expect(node.parent).to be nil + node.parent = Node.new(:text, "foobar", 0, nil, []) + expect(node.parent.content).to eq("foobar") + end + + it "has a :children attribute" do + expect(node.children).to be_a(Array) + expect(node.children.length).to eq(0) + node.children << "child" + expect(node.children[0]).to eq("child") + expect(node.children.length).to eq(1) + end + end + end +end diff --git a/spec/node_renderer_spec.rb b/spec/node_renderer_spec.rb new file mode 100644 index 0000000..ebf0d70 --- /dev/null +++ b/spec/node_renderer_spec.rb @@ -0,0 +1,46 @@ +# spec/node_renderer_spec.rb + +require 'dom_tree' +require 'node_renderer' + +describe "NodeRenderer" do + describe "#initialize" do + it "creates an instance of NodeRenderer" do + expect(NodeRenderer.new).to be_a(NodeRenderer) + end + end + + context "instance variables" do + describe "#tree" do + it "returns the value of the instance variable @tree" do + test_node_renderer = NodeRenderer.new("tree") + expect(test_node_renderer.tree).to eq("tree") + end + end + end + + describe "#render" do + node1 = Node.new(:open_tag, "", 1, node1, []) + node5 = Node.new(:text, "Some p text right here!", 1, node4, []) + node6 = Node.new(:close_tag, "
", 1, node4, []) + + node1.children << node2 + node1.children << node4 + node1.children << node3 + + node4.children << node5 + node4.children << node6 + + # setup tree and TreeSearcher instance + tree = DOMTree.new(node1) + tree_searcher = TreeSearcher.new(tree) + + describe "#search_by" do + context "searching for nodes by their tag name" do + it "returns the correct nodes when searching for an opening tag" do + results = tree_searcher.search_by(:name, 'div') + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node1) + end + + it "returns the correct nodes when searching for a closing tag" do + results = tree_searcher.search_by(:name, '/div') + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node3) + end + end + + context "searching for nodes by text" do + it "returns the correct nodes when searching by text" do + results = tree_searcher.search_by(:text, "text right here") + + expect(results).to be_a(Array) + expect(results.length).to eq(2) + expect(results).to include(node2) + expect(results).to include(node5) + + results = tree_searcher.search_by(:text, "P TEXT") + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results).to include(node5) + end + end + + context "searching for nodes by id" do + it "returns the correct nodes when searching by id" do + results = tree_searcher.search_by(:id, "bar") + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node1) + end + end + + context "searching for nodes by class" do + it "returns the correct nodes when searching by class" do + results = tree_searcher.search_by(:class, "foo") + + expect(results).to be_a(Array) + expect(results.length).to eq(2) + expect(results).to include(node1) + expect(results).to include(node4) + + results = tree_searcher.search_by(:class, "baz") + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node1) + end + end + end + + describe "#search_descendents" do + it "returns the same results as #search_by, but only includes the descendents of the input node" do + root = tree_searcher.tree.document + results = tree_searcher.search_descendents(root, :class, "foo") + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node4) + + results = tree_searcher.search_descendents(root, :text, "right here") + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node5) + end + end + + describe "search_ancestors" do + it "returns the same results as #search_by, but only includes the ancestors of the input node" do + start_node = node6 + results = tree_searcher.search_ancestors(start_node, :class, "foo") + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node1) + + results = tree_searcher.search_ancestors(start_node, :text, "right here") + + expect(results).to be_a(Array) + expect(results.length).to eq(1) + expect(results.first).to eq(node2) + end + end + end +end diff --git a/warmups/design_pseudocode.md b/warmups/design_pseudocode.md new file mode 100644 index 0000000..8982e00 --- /dev/null +++ b/warmups/design_pseudocode.md @@ -0,0 +1,51 @@ +### Pseudocoding the design + +``` +# DOMReader +Create an instance of DOMReader +Read in the provided HTML file +Parse it into individual tokens using regular expressions +Process each token to ensure that each edge case is handled: + - a tag with text before it, e.g. textp text