Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
app.config.from_pyfile('config.py')
api = Api(app)

def searchIn(logFilename):
def saveSearch(logFilename):
pathToFile = os.getcwd() + "/logs/" + logFilename
if os.path.isfile(pathToFile):
logFile = open(pathToFile, "r")
#apache.search(pathToFile) or some other way to start searching in apache
#apache.search(pathToFile) or some other way to start saveSearchg in apache
result_id = 1
return result_id
return -1
Expand Down Expand Up @@ -43,7 +43,7 @@ def post(self):
args = search_parser.parse_args()

log_filename = 'generated.log'
result_id = searchIn(log_filename)
result_id = saveSearch(log_filename)
run_script_with_args(args['contains'], args['limit'], result_id)

response = {
Expand All @@ -62,6 +62,8 @@ def post(self):
log_result_get = reqparse.RequestParser()
log_result_get.add_argument('id', type=int)

a = 'ss'

@log_result_api.response(400, 'Parameters not provided')
@log_result_api.route('/')
class Result(Resource):
Expand All @@ -75,7 +77,7 @@ def post(self): # for spark to send result
search_id = args['id']
content = args['content']
# created = update_element_with_id(search_id, search_phrase, line, date, content)

a = 'sa'
response = {
'msg': 'Result added'
#'created': created
Expand All @@ -90,12 +92,13 @@ def get(self): # for front to gain result
args = log_result_get.parse_args()

if args['id']:

#log_result = get_element_with_id(id)
return {'msg': 'log_result(result can be in database or not yet)'}, 200, {"Access-Control-Allow-Origin": "*"}
log_result = 'test';#get_element_with_id(id)
if log_result is None:
return {'msg': 'no results yet'}, 200, {"Access-Control-Allow-Origin": "*"}
else:
return [{'msg': log_result}], 200, {"Access-Control-Allow-Origin": "*"}
return {'msg': 'Parameters not provided'}, 400, {"Access-Control-Allow-Origin": "*"}




if __name__ == '__main__':
Expand Down
41 changes: 12 additions & 29 deletions search.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,40 +43,23 @@ def parse_apache_log_line(logline):
content_size = int(match.group(9))
)

# .cache() - Persists the RDD in memory, which will be re-used again
access_logs = (sc.textFile(logFile)
.map(parse_apache_log_line)
.cache())
# # .cache() - Persists the RDD in memory, which will be re-used again
# access_logs = (sc.textFile(logFile)
# .map(parse_apache_log_line)
# .cache())

schema_access_logs = sqlContext.createDataFrame(access_logs)
#Creates a table on which SQL like queries can be fired for analysis
schema_access_logs.registerTempTable("logs")

endpointsSearch = (sqlContext
.sql("SELECT * FROM logs WHERE endpoint=" + argv[1])
.rdd.map(lambda row: (row[0], row[1]))
.collect())


# def mappingFunc(s):
# words = s.split(" ")
# return len(words)

# base_df = spark.read.text(raw_data_files)
# base_df.printSchema()

# # view the type of data structure holding our log data using the following code: pyspark.sql.dataframe.DataFrame
# type(base_df)

# base_df_rdd = base_df.rdd
# type(base_df_rdd) #pyspark.rdd.RDD

# base_df_rdd.map(mappingFunc)
# schema_access_logs = sqlContext.createDataFrame(access_logs)
# #Creates a table on which SQL like queries can be fired for analysis
# schema_access_logs.registerTempTable("logs")

# endpointsSearch = (sqlContext
# .sql("SELECT * FROM logs WHERE endpoint=" + argv[1])
# .rdd.map(lambda row: (row[0], row[1]))
# .collect())

url = 'http://localhost:5000/result/'

myobj = {'id': argv[0],
myobj = {'id': argv[3],
'content': 'endpointsSearch'}


Expand Down