-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPageEntry.java
More file actions
executable file
·144 lines (113 loc) · 2.97 KB
/
PageEntry.java
File metadata and controls
executable file
·144 lines (113 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Scanner;
public class PageEntry {
BufferedReader br=null;
String pageData;
PageIndex pgIndex;
String pageName;
AVLTree<String> avl;
public PageEntry(String pageName) throws LinkedListOutofBoundsException, IOException,FileNotFoundException, PositionNotFoundException {
// TODO Auto-generated constructor stub
try {
//File fileSource=new File("webpages/"+pageName);
FileInputStream file=new FileInputStream("webpages/"+pageName);
this.pageName=pageName;
pageData=" ";
Scanner s=new Scanner(file);
String connectors[]={"a","an","the","they","these","this","for","is","are","was","of","or","and","does","will","whose"};
int i=1;
pgIndex=new PageIndex();
avl=new AVLTree<String>();
while(s.hasNext())
{
String toPut=s.next();
boolean connectorFound=false;
for(int c=0;c<connectors.length;c++)
{
if(toPut.equals(connectors[c]))
{
connectorFound=true;
break;
}
}
if(connectorFound)
{
i++;
continue;
}
toPut=filter(toPut);
String split[]=toPut.split(" ");
int j=0;
while(j<split.length)
{
Position p=new Position(this, i);
pgIndex.addPositionForWord(split[j], p);
avl.Insert(split[j], i);
j++;
i++;
}
}
s.close();
file.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
System.out.print("FileNotFound,");
pageData=null;
return;
}
}
public PageIndex getPageIndex()
{
return this.pgIndex;
}
public String filter(String str)
{
str = str.replaceAll("[\\-\\:\\^\\,\\.\\;\\'\\?\\!\\#\\<\\>\\[\\]\\=\\(\\)\\{\\}]", " ");
str = str.replace('"', ' ');
str = str.trim();
str = str.toLowerCase();
if(str.equals("stacks") || str.equals("structures") || str.equals("applications"))
str = str.substring(0, str.length() - 1);
return str;
}
public double getRelevanceOfPage(String[] str)
{
double relevance = 0;
Node<WordEntry> it=this.pgIndex.getWordEntries().head;
WordEntry we=new WordEntry(str[1]);
while(it.getData()!=null)
{
if(it.getData().getWord().equals(str[1]))
{
we=it.getData();
}
it=it.next;
}
Node<Position> iterate=we.getAllPositionsForThisWord().head;
while(iterate.getData()!=null)
{
if(iterate.getData().getPageEntry().equals(this))
{
int key=iterate.getData().getWordIndex();
double score=(double)key;
score=1.0/(score*score);
for(int i=2;i<str.length;i++)
{
AVLNode<String> node=this.avl.findNodeWithKey(key+i-1,avl.root);
if(!node.getData().equals(str[i]))
{
score=0;
break;
}
}
if(score!=0)
relevance+=score;
}
iterate=iterate.next;
}
return relevance;
}
}