-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextjson.py
More file actions
107 lines (94 loc) · 3.6 KB
/
extjson.py
File metadata and controls
107 lines (94 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import requests
import json
import warnings
import os
import urllib
import threading
from datetime import datetime
from queue import Queue
from queue import Empty
from time import sleep
import argparse
import base64
warnings.filterwarnings('ignore', message='Unverified HTTPS request')
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('url', type=str, help='url to AEM site (e.g. https://some-aem.com)')
parser.add_argument('--input', dest='f_input', type=str, default=None,
help='input file location with paths which will be added to queue before launch. Default will be "/"')
parser.add_argument('--threads', dest='t_number', type=int, default=5,
help='number of concurrent threads')
parser.add_argument('--credentials', dest='credentials', type=str, default=None,
help='number of retries if error occurs')
parser.add_argument('--ftype', type=str, dest='ftype', default='nt:file',
help='type from json field "type" which will be considered, others will be ignored')
parser.add_argument('--fname', type=str, dest='fname', default='name',
help='attribute name from which data should be written to file')
parser.add_argument('--timeout', dest='timeout', type=int, default=8,
help='timeout for a request (in seconds)')
args = parser.parse_args()
checked_list = []
if args.f_input is not None:
f = open(args.f_input)
input_list = f.readlines()
f.close()
else:
print("Input list not provided. Starting from '/', but weak results can be expected.")
input_list = ['/']
qlist = Queue()
[qlist.put(i) for i in input_list if not ':' in i]
output = Queue()
output_file = f'listing-{datetime.now()}.txt'
if args.credentials is not None:
credentials = base64.b64encode(args.credentials.encode()).decode("utf-8")
headers = {f'Authorization': 'Basic ' + credentials}
else:
headers = ''
def run():
while not qlist.empty():
directory = qlist.get()
if '.' in directory.split('/')[-1]:
directory = os.path.dirname(os.path.abspath(directory))
if directory in checked_list:
continue
checked_list.append(directory)
directory = directory.strip()
url = f'{args.url}{directory}.ext.json'
r = requests.get(url, verify=False, timeout=args.timeout, allow_redirects=False, headers=headers)
try:
for j in r.json():
if j['type'] == args.ftype:
found = directory + j[args.fname]
qlist.put(found)
output.put(found + "\n")
except json.decoder.JSONDecodeError as JSONDecodeError:
# print("JSONDecodeError - ignored")
pass
except Empty:
print("QUEUE EMPTY")
return
def status():
sleep(args.timeout)
while not qlist.empty():
print("Queue size:", qlist.qsize(), sep=' ', end='\r', flush=True)
sleep(2)
def writer():
with open(output_file, 'a+') as fo:
while True:
try:
while True:
fo.write(output.get(block=True, timeout=5) + "")
except:
if qlist.empty() and output.empty():
print("End of writing to file")
return
print("End of writing to file")
threads = []
for i in range(args.t_number):
t = threading.Thread(target=run)
threads.append(t)
t.start()
timer = threading.Thread(target=status)
timer.start()
writer = threading.Thread(target=writer)
writer.start()
[t.join() for t in threads]