diff --git a/app/__init__.py b/app/__init__.py index bc074a5..ed03e35 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -9,11 +9,11 @@ app.config.from_pyfile('config.py') api = Api(app) -def searchIn(logFilename): +def saveSearch(logFilename): pathToFile = os.getcwd() + "/logs/" + logFilename if os.path.isfile(pathToFile): logFile = open(pathToFile, "r") - #apache.search(pathToFile) or some other way to start searching in apache + #apache.search(pathToFile) or some other way to start saveSearchg in apache result_id = 1 return result_id return -1 @@ -43,7 +43,7 @@ def post(self): args = search_parser.parse_args() log_filename = 'generated.log' - result_id = searchIn(log_filename) + result_id = saveSearch(log_filename) run_script_with_args(args['contains'], args['limit'], result_id) response = { @@ -62,6 +62,8 @@ def post(self): log_result_get = reqparse.RequestParser() log_result_get.add_argument('id', type=int) +a = 'ss' + @log_result_api.response(400, 'Parameters not provided') @log_result_api.route('/') class Result(Resource): @@ -75,7 +77,7 @@ def post(self): # for spark to send result search_id = args['id'] content = args['content'] # created = update_element_with_id(search_id, search_phrase, line, date, content) - + a = 'sa' response = { 'msg': 'Result added' #'created': created @@ -90,12 +92,13 @@ def get(self): # for front to gain result args = log_result_get.parse_args() if args['id']: - - #log_result = get_element_with_id(id) - return {'msg': 'log_result(result can be in database or not yet)'}, 200, {"Access-Control-Allow-Origin": "*"} + log_result = 'test';#get_element_with_id(id) + if log_result is None: + return {'msg': 'no results yet'}, 200, {"Access-Control-Allow-Origin": "*"} + else: + return [{'msg': log_result}], 200, {"Access-Control-Allow-Origin": "*"} return {'msg': 'Parameters not provided'}, 400, {"Access-Control-Allow-Origin": "*"} - if __name__ == '__main__': diff --git a/search.py b/search.py index 99e4ed8..2c42fa1 100644 --- a/search.py +++ b/search.py @@ -43,40 +43,23 @@ def parse_apache_log_line(logline): content_size = int(match.group(9)) ) -# .cache() - Persists the RDD in memory, which will be re-used again -access_logs = (sc.textFile(logFile) - .map(parse_apache_log_line) - .cache()) +# # .cache() - Persists the RDD in memory, which will be re-used again +# access_logs = (sc.textFile(logFile) +# .map(parse_apache_log_line) +# .cache()) -schema_access_logs = sqlContext.createDataFrame(access_logs) -#Creates a table on which SQL like queries can be fired for analysis -schema_access_logs.registerTempTable("logs") - -endpointsSearch = (sqlContext - .sql("SELECT * FROM logs WHERE endpoint=" + argv[1]) - .rdd.map(lambda row: (row[0], row[1])) - .collect()) - - -# def mappingFunc(s): -# words = s.split(" ") -# return len(words) - -# base_df = spark.read.text(raw_data_files) -# base_df.printSchema() - -# # view the type of data structure holding our log data using the following code: pyspark.sql.dataframe.DataFrame -# type(base_df) - -# base_df_rdd = base_df.rdd -# type(base_df_rdd) #pyspark.rdd.RDD - -# base_df_rdd.map(mappingFunc) +# schema_access_logs = sqlContext.createDataFrame(access_logs) +# #Creates a table on which SQL like queries can be fired for analysis +# schema_access_logs.registerTempTable("logs") +# endpointsSearch = (sqlContext +# .sql("SELECT * FROM logs WHERE endpoint=" + argv[1]) +# .rdd.map(lambda row: (row[0], row[1])) +# .collect()) url = 'http://localhost:5000/result/' -myobj = {'id': argv[0], +myobj = {'id': argv[3], 'content': 'endpointsSearch'}