forked from microsoft/MSMARCO-Passage-Ranking
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsubsample.py
More file actions
21 lines (20 loc) · 699 Bytes
/
subsample.py
File metadata and controls
21 lines (20 loc) · 699 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import sys
def sub(filename,out, size):
qids = {}
with open(filename,'r') as f:
for l in f:
l = l.strip().split('\t')[0]
qids[l]= 0
sample = list(qids)[:size]
with open(filename,'r') as f:
with open(out,'w') as w:
for l in f:
qid = l.strip().split('\t')[0]
if qid in sample:
w.write(l)
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Usage: subsample.py <input filename> <output filenane> <sample size>")
exit(-1)
else:
sub(sys.argv[1],sys.argv[2],int(sys.argv[3]))