diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ae7e697 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +*.swp +cachedir/* diff --git a/BatchProviderROI.lua b/BatchProviderROI.lua new file mode 100644 index 0000000..38fa310 --- /dev/null +++ b/BatchProviderROI.lua @@ -0,0 +1,158 @@ +local BatchProviderROI, parent = torch.class('nnf.BatchProviderROI','nnf.BatchProvider') + +function BatchProviderROI:__init(dataset) + local fp = {dataset=dataset} + parent.__init(self, fp) + self.imgs_per_batch = 2 + self.scale = 600 + self.max_size = 1000 + self.image_transformer = nnf.ImageTransformer{} +end + +-- setup is the same + +function BatchProviderROI:permuteIdx() + --local fg_num_total = self.fg_num_total + --local bg_num_total = self.bg_num_total + local total_img = self.dataset:size() + local imgs_per_batch = self.imgs_per_batch + + self._cur = self._cur or math.huge + + if self._cur + imgs_per_batch > total_img then + self._perm = torch.randperm(total_img) + self._cur = 1 + end + + local img_idx = self._perm[{{self._cur,self._cur + self.imgs_per_batch - 1}}] + self._cur = self._cur + self.imgs_per_batch + + local img_idx_end = imgs_per_batch + + local fg_windows = {} + local bg_windows = {} + for i=1,img_idx_end do + local curr_idx = img_idx[i] + bg_windows[i] = {} + if self.bboxes[curr_idx][0] then + for j=1,self.bboxes[curr_idx][0]:size(1) do + table.insert(bg_windows[i],{curr_idx,j}) + end + end + fg_windows[i] = {} + if self.bboxes[curr_idx][1] then + for j=1,self.bboxes[curr_idx][1]:size(1) do + table.insert(fg_windows[i],{curr_idx,j}) + end + end + end + local do_flip = torch.FloatTensor(imgs_per_batch):random(0,1) + local opts = {img_idx=img_idx,img_idx_end=img_idx_end,do_flip=do_flip} + return fg_windows,bg_windows,opts + +end + +function BatchProviderROI:selectBBoxes(fg_windows,bg_windows,im_scales,do_flip,im_sizes) + local fg_num_each = torch.round(self.fg_num_each/self.imgs_per_batch) + local bg_num_each = torch.round(self.bg_num_each/self.imgs_per_batch) + + local rois = {} + local labels = {} + for im=1,self.imgs_per_batch do + local im_scale = im_scales[im] + local window_idx = torch.randperm(#bg_windows[im]) + local end_idx = math.min(bg_num_each,#bg_windows[im]) + local flip = do_flip[im] == 1 + local im_size = im_sizes[im] + for i=1,end_idx do + local curr_idx = bg_windows[im][window_idx[i] ][1] + local position = bg_windows[im][window_idx[i] ][2] + local dd = self.bboxes[curr_idx][0][position][{{2,5}}]:clone() + dd:add(-1):mul(im_scale):add(1) + if flip then + local tt = dd[1] + dd[1] = im_size[2]-dd[3] +1 + dd[3] = im_size[2]-tt +1 + end + table.insert(rois,{im,dd[1],dd[2],dd[3],dd[4]}) + table.insert(labels,self.bboxes[curr_idx][0][position][6]) + end + + window_idx = torch.randperm(#fg_windows[im]) + local end_idx = math.min(fg_num_each,#fg_windows[im]) + for i=1,end_idx do + local curr_idx = fg_windows[im][window_idx[i] ][1] + local position = fg_windows[im][window_idx[i] ][2] + local dd = self.bboxes[curr_idx][1][position][{{2,5}}]:clone() + dd:add(-1):mul(im_scale):add(1) + if flip then + local tt = dd[1] + dd[1] = im_size[2]-dd[3] +1 + dd[3] = im_size[2]-tt +1 + end + table.insert(rois,{im,dd[1],dd[2],dd[3],dd[4]}) + table.insert(labels,self.bboxes[curr_idx][1][position][6]) + end + end + rois = torch.FloatTensor(rois) + labels = torch.IntTensor(labels) + return rois, labels +end + +local function getImages(self,img_ids,images,do_flip) + local dataset = self.dataset + local num_images = img_ids:size(1) + + local imgs = {} + local im_sizes = {} + local im_scales = {} + + for i=1,num_images do + local im = dataset:getImage(img_ids[i]) + im = self.image_transformer:preprocess(im) + local flip = do_flip[i] == 1 + if flip then + im = image.hflip(im) + end + local im_size = im[1]:size() + local im_size_min = math.min(im_size[1],im_size[2]) + local im_size_max = math.max(im_size[1],im_size[2]) + local im_scale = self.scale/im_size_min + if torch.round(im_scale*im_size_max) > self.max_size then + im_scale = self.max_size/im_size_max + end + local im_s = {torch.round(im_size[1]*im_scale),torch.round(im_size[2]*im_scale)} + table.insert(imgs,image.scale(im,im_s[2],im_s[1])) + table.insert(im_sizes,im_s) + table.insert(im_scales,im_scale) + end + -- create single tensor with all images, padding with zero for different sizes + im_sizes = torch.IntTensor(im_sizes) + local max_shape = im_sizes:max(1)[1] + images:resize(num_images,3,max_shape[1],max_shape[2]):zero() + for i=1,num_images do + images[i][{{},{1,imgs[i]:size(2)},{1,imgs[i]:size(3)}}]:copy(imgs[i]) + end + return im_scales,im_sizes +end + + +function BatchProviderROI:getBatch(batches,targets) + local dataset = self.dataset + + self.fg_num_each = self.fg_fraction * self.batch_size + self.bg_num_each = self.batch_size - self.fg_num_each + + local fg_windows,bg_windows,opts = self:permuteIdx() + --local fg_w,bg_w = self:selectBBoxes(fg_windows,bg_windows) + + local batches = batches or {torch.FloatTensor(),torch.FloatTensor()} + local targets = targets or torch.FloatTensor() + + local im_scales, im_sizes = getImages(self,opts.img_idx,batches[1],opts.do_flip) + local rois,labels = self:selectBBoxes(fg_windows,bg_windows,im_scales,opts.do_flip, im_sizes) + batches[2]:resizeAs(rois):copy(rois) + targets:resize(labels:size()):copy(labels) + + return batches, targets +end diff --git a/FRCNN.lua b/FRCNN.lua new file mode 100644 index 0000000..6fd8061 --- /dev/null +++ b/FRCNN.lua @@ -0,0 +1,42 @@ +local FRCNN = torch.class('nnf.FRCNN') + +function FRCNN:__init(dataset) + self.dataset = dataset + + self.scale = {600} + self.max_dim = 1000 + self.randomscale = true + + --self.sz_conv_standard = 13 + self.step_standard = 16 + --self.offset0 = 21 + --self.offset = 6.5 + + --self.inputArea = 224^2 + +end + +function FRCNN:getScale(I) + local min_size = math.min(I[2],I[3]) + local max_size = math.max(I[2],I[3]) + local scale + if max_size <= self.max_dim then + scale = self.scale[1]/min_size + else + scale = self.max_dim/max_size + end + return scale +end + +function FRCNN:projectBBoxes(bboxes,scale) + return (bboxes-1)*scale+1 +end + +function FRCNN:getFeatures(i,flip) + local I = self.dataset:getImage(i) + local bboxes = self.dataset:attachProposals(i) + I = prepareImage(I) + if flip then + + end +end diff --git a/ImageDetect.lua b/ImageDetect.lua new file mode 100644 index 0000000..27bf8e2 --- /dev/null +++ b/ImageDetect.lua @@ -0,0 +1,88 @@ +local ImageDetect = torch.class('nnf.ImageDetect') + +function ImageDetect:__init(model) + self.model = model + self.image_transformer = nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717}, + raw_scale = 255, + swap = {3,2,1}} + self.scale = {600} + self.max_size = 1000 + self.sm = nn.SoftMax():cuda() +end + + +local function getImages(self,images,im) + local num_scales = #self.scale + + local imgs = {} + local im_sizes = {} + local im_scales = {} + + im = self.image_transformer:preprocess(im) + + local im_size = im[1]:size() + local im_size_min = math.min(im_size[1],im_size[2]) + local im_size_max = math.max(im_size[1],im_size[2]) + for i=1,num_scales do + local im_scale = self.scale[i]/im_size_min + if torch.round(im_scale*im_size_max) > self.max_size then + im_scale = self.max_size/im_size_max + end + local im_s = {im_size[1]*im_scale,im_size[2]*im_scale} + table.insert(imgs,image.scale(im,im_s[2],im_s[1])) + table.insert(im_sizes,im_s) + table.insert(im_scales,im_scale) + end + -- create single tensor with all images, padding with zero for different sizes + im_sizes = torch.IntTensor(im_sizes) + local max_shape = im_sizes:max(1)[1] + images:resize(num_scales,3,max_shape[1],max_shape[2]):zero() + for i=1,num_scales do + images[i][{{},{1,imgs[i]:size(2)},{1,imgs[i]:size(3)}}]:copy(imgs[i]) + end + return im_scales +end + +local function project_im_rois(im_rois,scales) + local levels + local rois = torch.FloatTensor() + if #scales > 1 then + local scales = torch.FloatTensor(scales) + local widths = im_rois[{{},3}] - im_rois[{{},1}] + 1 + local heights = im_rois[{{},4}] - im_rois[{{}, 2}] + 1 + + local areas = widths * heights + local scaled_areas = areas:view(-1,1) * torch.pow(scales:view(1,-1),2) + local diff_areas = torch.abs(scaled_areas - 224 * 224) + levels = select(2, diff_areas:min(2)) + else + levels = torch.FloatTensor() + rois:resize(im_rois:size(1),5) + rois[{{},1}]:fill(1) + rois[{{},{2,5}}]:copy(im_rois):add(-1):mul(scales[1]):add(1) + end + + return rois + +end + +-- supposes boxes is in [x1,y1,x2,y2] format +function ImageDetect:detect(im,boxes) + local inputs = {torch.FloatTensor(),torch.FloatTensor()} + local im_scales = getImages(self,inputs[1],im) + inputs[2] = project_im_rois(boxes,im_scales) + + local inputs_cuda = {torch.CudaTensor(),torch.CudaTensor()} + inputs_cuda[1]:resize(inputs[1]:size()):copy(inputs[1]) + inputs_cuda[2]:resize(inputs[2]:size()):copy(inputs[2]) + local output0 = self.model:forward(inputs_cuda) + local output = self.sm:forward(output0):float() + --[[ + for i=1,#im_scales do + local dd = boxes:clone() + dd:add(-1):mul(im_scale[i]):add(1) + + end + --]] + return output +end diff --git a/ROIPooling.lua b/ROIPooling.lua new file mode 100644 index 0000000..af12402 --- /dev/null +++ b/ROIPooling.lua @@ -0,0 +1,86 @@ +local ROIPooling,parent = torch.class('nnf.ROIPooling','nn.Module') + +function ROIPooling:__init(W,H) + parent.__init(self) + self.W = W + self.H = H + self.pooler = {}--nn.SpatialAdaptiveMaxPooling(W,H) + self.spatial_scale = 1 + self.gradInput = {torch.Tensor()} +end + +function ROIPooling:setSpatialScale(scale) + self.spatial_scale = scale + return self +end + +function ROIPooling:updateOutput(input) + local data = input[1] + local rois = input[2] + + local num_rois = rois:size(1) + local s = data:size() + local ss = s:size(1) + self.output:resize(num_rois,s[ss-2],self.H,self.W) + + rois[{{},{2,5}}]:add(-1):mul(self.spatial_scale):add(1):round() + rois[{{},2}]:cmin(s[ss]) + rois[{{},3}]:cmin(s[ss-1]) + rois[{{},4}]:cmin(s[ss]) + rois[{{},5}]:cmin(s[ss-1]) + + -- element access is faster if not a cuda tensor + if rois:type() == 'torch.CudaTensor' then + self._rois = self._rois or torch.FloatTensor() + self._rois:resize(rois:size()):copy(rois) + rois = self._rois + end + + if not self._type then self._type = output:type() end + + if #self.pooler < num_rois then + local diff = num_rois - #self.pooler + for i=1,diff do + table.insert(self.pooler,nn.SpatialAdaptiveMaxPooling(self.W,self.H):type(self._type)) + end + end + + for i=1,num_rois do + local roi = rois[i] + local im_idx = roi[1] + local im = data[{im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}] + self.output[i] = self.pooler[i]:updateOutput(im) + end + return self.output +end + +function ROIPooling:updateGradInput(input,gradOutput) + local data = input[1] + local rois = input[2] + if rois:type() == 'torch.CudaTensor' then + rois = self._rois + end + local num_rois = rois:size(1) + local s = data:size() + local ss = s:size(1) + self.gradInput[1]:resizeAs(data):zero() + + for i=1,num_rois do + local roi = rois[i] + local im_idx = roi[1] + local r = {im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}} + local im = data[r] + local g = self.pooler[i]:updateGradInput(im,gradOutput[i]) + self.gradInput[1][r]:add(g) + end + return self.gradInput +end + +function ROIPooling:type(type) + parent.type(self,type) + for i=1,#self.pooler do + self.pooler[i]:type(type) + end + self._type = type + return self +end diff --git a/Tester_FRCNN.lua b/Tester_FRCNN.lua new file mode 100644 index 0000000..3a541e0 --- /dev/null +++ b/Tester_FRCNN.lua @@ -0,0 +1,193 @@ +local utils = paths.dofile('utils.lua') +local nms = paths.dofile('nms.lua') + +local keep_top_k = utils.keep_top_k +local VOCevaldet = utils.VOCevaldet + +local Tester = torch.class('nnf.Tester_FRCNN') + +function Tester:__init(module,feat_provider) + self.dataset = feat_provider.dataset + self.module = module + self.feat_provider = feat_provider + + self.feat_dim = {256*50} + self.max_batch_size = 4000 + + self.cachefolder = nil + self.cachename = nil + self.suffix = '' + self.verbose = true +end + +-- improve it ! +function Tester:validate(criterion) + + local tname = paths.concat(self.cachefolder,self.cachename) + local valData + if paths.filep(tname) then + valData = torch.load(tname) + else + -- batch_provider need to be set before + valData = {} + valData.inputs,valData.targets = self.batch_provider:getBatch() + torch.save(tname,valData) + self.batch_provider = nil + end + + local num_batches = valData.inputs:size(1) + local module = self.module + + local err = 0 + local inputs = torch.CudaTensor() + local targets = torch.CudaTensor() + for t=1,num_batches do + xlua.progress(t,num_batches) + + inputs:resize(valData.inputs[t]:size()):copy(valData.inputs[t]) + targets:resize(valData.targets[t]:size()):copy(valData.targets[t]) + + local output = module:forward(inputs) + + err = err + criterion:forward(output,targets) + end + + valData = nil + collectgarbage() + + return err/num_batches +end + +function Tester:test(iteration) + + local dataset = self.dataset + local module = self.module + local feat_provider = self.feat_provider + + local pathfolder = paths.concat(self.cachefolder,'test_iter'..iteration) + paths.mkdir(pathfolder) + + module:evaluate() + dataset:loadROIDB() + + local feats = torch.FloatTensor() + local feats_batched = {} + local feats_cuda = torch.CudaTensor() + + local output = torch.FloatTensor() + + local output_dim = module:get(module:size()) + + local softmax = nn.SoftMax():float() + + local boxes + -- + local aboxes = {} + for i=1,dataset.num_classes do + table.insert(aboxes,{}) + end + + local max_per_set = 5*dataset:size() + local max_per_image = 100 + local thresh = torch.ones(dataset.num_classes):mul(-1.5) + local scored_boxes = torch.FloatTensor() + + local timer = torch.Timer() + local timer2 = torch.Timer() + local timer3 = torch.Timer() + local detec = nnf.ImageDetect(module) + for i=1,dataset:size() do + timer:reset() + io.write(('test: (%s) %5d/%-5d '):format(dataset.dataset_name,i,dataset:size())); + boxes = dataset:getROIBoxes(i):float() + local im = dataset:getImage(i) + timer3:reset() + local output = detec:detect(im,boxes) + + local add_bg = 0 + if dataset.num_classes ~= output:size(2) then -- if there is no svm + --output = softmax:forward(output) + add_bg = 1 + end + local tt = 0 + local tt2 = timer3:time().real + + timer2:reset() + for j=1,dataset.num_classes do + local scores = output:select(2,j+add_bg) + local idx = torch.range(1,scores:numel()):long() + local idx2 = scores:gt(thresh[j]) + idx = idx[idx2] + scored_boxes:resize(idx:numel(),5) + if scored_boxes:numel() > 0 then + scored_boxes:narrow(2,1,4):index(boxes,1,idx) + scored_boxes:select(2,5):copy(scores[idx2]) + end + local keep = nms(scored_boxes,0.3) + if keep:numel()>0 then + local _,ord = torch.sort(scored_boxes:select(2,5):index(1,keep),true) + ord = ord:narrow(1,1,math.min(ord:numel(),max_per_image)) + keep = keep:index(1,ord) + aboxes[j][i] = scored_boxes:index(1,keep) + else + aboxes[j][i] = torch.FloatTensor() + end + + if i%1000 == 0 then + aboxes[j],thresh[j] = keep_top_k(aboxes[j],max_per_set) + end + + end + + io.write((' prepare feat time: %.3f, forward time: %.3f, select time: %.3fs, total time: %.3fs\n'):format(tt,tt2,timer2:time().real,timer:time().real)); + --collectgarbage() + --mattorch.save(paths.concat(pathfolder,dataset.img_ids[i]..'.mat'),output:double()) + end + + for i = 1,dataset.num_classes do + -- go back through and prune out detections below the found threshold + for j = 1,dataset:size() do + if aboxes[i][j]:numel() > 0 then + local I = aboxes[i][j]:select(2,5):lt(thresh[i]) + local idx = torch.range(1,aboxes[i][j]:size(1)):long() + idx = idx[I] + if idx:numel()>0 then + aboxes[i][j] = aboxes[i][j]:index(1,idx) + end + end + end + save_file = paths.concat(pathfolder, dataset.classes[i].. '_boxes_'.. + dataset.dataset_name..self.suffix) + torch.save(save_file, aboxes) + end + + local res = {} + for i=1,dataset.num_classes do + local cls = dataset.classes[i] + res[i] = VOCevaldet(dataset,aboxes[i],cls) + end + res = torch.Tensor(res) + print('Results:') + -- print class names + io.write('|') + for i = 1, dataset.num_classes do + io.write(('%5s|'):format(dataset.classes[i])) + end + io.write('\n|') + -- print class scores + for i = 1, dataset.num_classes do + local l = #dataset.classes[i] < 5 and 5 or #dataset.classes[i] + local l = res[i] == res[i] and l-5 or l-3 + if l > 0 then + io.write(('%.3f%'..l..'s|'):format(res[i],' ')) + else + io.write(('%.3f|'):format(res[i])) + end + end + io.write('\n') + io.write(('mAP: %.4f\n'):format(res:mean(1)[1])) + + -- clean roidb to free memory + dataset.roidb = nil + return res +end diff --git a/data.lua b/data.lua index 655deb5..f1cb9d5 100644 --- a/data.lua +++ b/data.lua @@ -51,6 +51,7 @@ else batch_provider = nnf.BatchProvider(feat_provider) batch_provider.iter_per_batch = opt.ipb batch_provider.nTimesMoreData = opt.ntmd + batch_provider.batch_size = opt.batch_size batch_provider.fg_fraction = opt.fg_frac batch_provider.bg_threshold = {0.0,0.5} batch_provider.do_flip = true @@ -89,6 +90,7 @@ else batch_provider_test = nnf.BatchProvider(feat_provider_test) batch_provider_test.iter_per_batch = 500--opt.ipb batch_provider_test.nTimesMoreData = 10--opt.ntmd + batch_provider_test.batch_size = opt.batch_size batch_provider_test.fg_fraction = opt.fg_frac batch_provider_test.bg_threshold = {0.0,0.5} batch_provider_test.do_flip = false diff --git a/model.lua b/model.lua index 9700f0b..36812e1 100644 --- a/model.lua +++ b/model.lua @@ -21,6 +21,17 @@ if opt.algo == 'RCNN' then elseif opt.algo == 'SPP' then features = model:get(1) classifier = model:get(3) +elseif opt.algo == 'FRCNN' then + local temp = nn.Sequential() + local features = model:get(1) + local classifier = model:get(3) + local prl = nn.ParallelTable() + prl:add(features) + prl:add(nn.Identity()) + temp:add(prl) + temp:add(nnf.ROIPooling(7,7)) + temp:add(nn.View(-1):setNumInputDims(3)) + temp:add(classifier) end -- 2. Create Criterion diff --git a/nms.lua b/nms.lua index ab12f63..2663b76 100644 --- a/nms.lua +++ b/nms.lua @@ -39,14 +39,22 @@ local function nms(boxes, overlap) I = I[{{1,last-1}}] xx1:index(x1,1,I) - xx1:clamp(0,x1[i]) yy1:index(y1,1,I) - yy1:clamp(0,y1[i]) xx2:index(x2,1,I) - xx2:clamp(x2[i],math.huge) yy2:index(y2,1,I) - yy2:clamp(y2[i],math.huge) - + + -- this code is wrong + -- xx1:clamp(0,x1[i]) + -- yy1:clamp(0,y1[i]) + -- xx2:clamp(x2[i],math.huge) + -- yy2:clamp(y2[i],math.huge) + + -- this code is right + xx1:clamp(x1[i],math.huge) + yy1:clamp(y1[i],math.huge) + xx2:clamp(0,x2[i]) + yy2:clamp(0,y2[i]) + w:resizeAs(xx2):zero() w:map2(xx2,xx1,function(xx,xxx2,xxx1) return math.max(xxx2-xxx1+1,0) end) h:resizeAs(yy2):zero() diff --git a/nnf.lua b/nnf.lua index a2e7831..f4610da 100644 --- a/nnf.lua +++ b/nnf.lua @@ -1,20 +1,24 @@ require 'nn' require 'image' -require 'inn' +--require 'inn' require 'xlua' nnf = {} torch.include('nnf','DataSetPascal.lua') torch.include('nnf','BatchProvider.lua') +torch.include('nnf','BatchProviderROI.lua') -torch.include('nnf','SPP.lua') +--torch.include('nnf','SPP.lua') torch.include('nnf','RCNN.lua') +torch.include('nnf','ROIPooling.lua') torch.include('nnf','Trainer.lua') torch.include('nnf','Tester.lua') +torch.include('nnf','Tester_FRCNN.lua') torch.include('nnf','SVMTrainer.lua') torch.include('nnf','ImageTransformer.lua') +torch.include('nnf','ImageDetect.lua') --return nnf diff --git a/opts.lua b/opts.lua index f07d8dc..3665874 100644 --- a/opts.lua +++ b/opts.lua @@ -36,6 +36,7 @@ function M.parse(arg) cmd:option('-nsmooth',40,'number of iterations before reducing learning rate') cmd:option('-nred',4,'number of divisions by 2 before stopping learning') cmd:option('-nildfdx',false,'erase memory of gradients when reducing learning rate') + cmd:option('-batch_size',128,'batch size') cmd:text() cmd:text('Others') cmd:option('-gpu',1,'gpu device to use') diff --git a/test.lua b/test.lua new file mode 100644 index 0000000..6057a07 --- /dev/null +++ b/test.lua @@ -0,0 +1,13 @@ +require 'nn' +nnf = {} +dofile 'ROIPooling.lua' + +m = nnf.ROIPooling(3,3) + +t = {torch.rand(1,10,10),torch.Tensor({{1,1,5,5},{2,3,7,8},{6,4,8,8},{6,4,10,10},{8,8,10,10}})} -- +g = torch.rand(t[2]:size(1),1,3,3) + +o = m:forward(t) +gg = m:backward(t,g) + + diff --git a/test_frcnn.lua b/test_frcnn.lua new file mode 100644 index 0000000..24bb23b --- /dev/null +++ b/test_frcnn.lua @@ -0,0 +1,282 @@ +require 'nnf' +require 'inn' +require 'cudnn' +require 'gnuplot' + +cutorch.setDevice(2) + +dt = torch.load('pascal_2007_train.t7') +if false then + ds = nnf.DataSetPascal{image_set='train', + datadir='/home/francisco/work/datasets/VOCdevkit', + roidbdir='/home/francisco/work/datasets/rcnn/selective_search_data' + } +else + ds = nnf.DataSetPascal{image_set='trainval', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } +end + +if false then + ds.roidb = {} + for i=1,ds:size() do + ds.roidb[i] = torch.IntTensor(10,4):random(1,5) + ds.roidb[i][{{},{3,4}}]:add(6) + end +elseif false then + ds.roidb = dt.roidb +end + +local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},--{103.939, 116.779, 123.68}, + raw_scale = 255, + swap = {3,2,1}} +if true then + bp = nnf.BatchProviderROI(ds) + bp.image_transformer = image_transformer + bp.bg_threshold = {0.1,0.5} + bp:setupData() +else + bp = nnf.BatchProviderROI(ds) + bp.image_transformer = image_transformer + local temp = torch.load('pascal_2007_train_bp.t7') + bp.bboxes = temp.bboxes +end + + +if false then + local mytest = nnf.ROIPooling(50,50):float() + function do_mytest() + local input0,target0 = bp:getBatch(input0,target0) + local o = mytest:forward(input0) + return input0,target0,o + end + --input0,target0,o = do_mytest() +end + +--------------------------------------------------------------------------------------- +-- model +--------------------------------------------------------------------------------------- +do + + model = nn.Sequential() + local features = nn.Sequential() + local classifier = nn.Sequential() + + if false then + features:add(nn.SpatialConvolutionMM(3,96,11,11,4,4,5,5)) + features:add(nn.ReLU(true)) + features:add(nn.SpatialConvolutionMM(96,128,5,5,2,2,2,2)) + features:add(nn.ReLU(true)) + features:add(nn.SpatialMaxPooling(2,2,2,2)) + + classifier:add(nn.Linear(128*7*7,1024)) + classifier:add(nn.ReLU(true)) + classifier:add(nn.Dropout(0.5)) + classifier:add(nn.Linear(1024,21)) + + elseif false then + require 'loadcaffe' +-- local rcnnfold = '/home/francisco/work/libraries/rcnn/' +-- local base_model = loadcaffe.load( +-- rcnnfold..'model-defs/pascal_finetune_deploy.prototxt', +-- rcnnfold..'data/caffe_nets/finetune_voc_2012_train_iter_70k', +-- 'cudnn') + + local rcnnfold = '/home/francisco/work/libraries/caffe/examples/imagenet/' + local base_model = loadcaffe.load( + rcnnfold..'imagenet_deploy.prototxt', + rcnnfold..'caffe_reference_imagenet_model', + 'cudnn') + + + for i=1,14 do + features:add(base_model:get(i):clone()) + end + for i=17,22 do + classifier:add(base_model:get(i):clone()) + end + classifier:add(nn.Linear(4096,21):cuda()) + + collectgarbage() + + else + local fold = 'data/models/imagenet_models/alexnet/' + local m1 = torch.load(fold..'features.t7') + local m2 = torch.load(fold..'top.t7') + + for i=1,14 do + features:add(m1:get(i):clone()) + end + features:get(3).padW = 1 + features:get(3).padH = 1 + features:get(7).padW = 1 + features:get(7).padH = 1 + + for i=2,7 do + classifier:add(m2:get(i):clone()) + end + local linear = nn.Linear(4096,21):cuda() + linear.weight:normal(0,0.01) + linear.bias:zero() + classifier:add(linear) + end + collectgarbage() + + local prl = nn.ParallelTable() + prl:add(features) + prl:add(nn.Identity()) + model:add(prl) + --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16)) + model:add(inn.ROIPooling(6,6):setSpatialScale(1/16)) + model:add(nn.View(-1):setNumInputDims(3)) + model:add(classifier) + +end +print(model) + +model:cuda() +parameters,gradParameters = model:getParameters() + +parameters2,gradParameters2 = model:parameters() + +lr = {0,0,1,2,1,2,1,2,1,2,1,2,1,2,1,2} +wd = {0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0} + +local function updateGPlrwd(clr) + local clr = clr or 1 + for i,p in pairs(gradParameters2) do + p:add(wd[i]*0.0005,parameters2[i]) + p:mul(lr[i]*clr) + end +end + +optimState = {learningRate = 1,--1e-3, + weightDecay = 0.000, momentum = 0.9, + learningRateDecay = 0, dampening=0} + +-------------------------------------------------------------------------- +-- training +-------------------------------------------------------------------------- + +confusion_matrix = optim.ConfusionMatrix(21) + + +model:training() + +savedModel = model:clone('weight','bias','running_mean','running_std') + +criterion = nn.CrossEntropyCriterion():cuda() +--criterion.nll.sizeAverage = false + +--normalize = true + +display_iter = 20 + +--inputs = {torch.CudaTensor(),torch.FloatTensor()} +inputs = {torch.CudaTensor(),torch.CudaTensor()} +target = torch.CudaTensor() + +learningRate = 1e-3 + +function train() + local err = 0 + for i=1,display_iter do + xlua.progress(i,display_iter) + inputs0,target0 = bp:getBatch(inputs0,target0) + inputs[1]:resize(inputs0[1]:size()):copy(inputs0[1]) + inputs[2]:resize(inputs0[2]:size()):copy(inputs0[2]) + target:resize(target0:size()):copy(target0) + local batchSize = target:size(1) + + local feval = function(x) + if x ~= parameters then + parameters:copy(x) + end + gradParameters:zero() + + local outputs = model:forward(inputs) + + local f = criterion:forward(outputs,target) + local df_do = criterion:backward(outputs,target) + + model:backward(inputs,df_do) + + -- mimic different learning rates per layer + -- without the cost of having a huge tensor + updateGPlrwd(learningRate) + + if normalize then + gradParameters:div(batchSize) + f = f/batchSize + end + + confusion_matrix:batchAdd(outputs,target) + + return f,gradParameters + end + + local x,fx = optim.sgd(feval,parameters,optimState) + err = err + fx[1] + end + print('Training error: '..err/display_iter) + return err/display_iter +end + +epoch_size = math.ceil(ds:size()/bp.imgs_per_batch) +stepsize = 30000--30000 +print_step = 10 +num_iter = 40000--40000 +num_iter = num_iter/display_iter--3000 + +confusion_matrix:zero() +train_err = {} +exp_name = 'frcnn_t11' + +paths.mkdir(paths.concat('cachedir',exp_name)) +--logger = optim.Logger(paths.concat('cachedir',exp_name,'train_err.log')) +train_acc = {} +for i=1,num_iter do + + if i%(stepsize/display_iter) == 0 then + --optimState.learningRate = optimState.learningRate/10 + learningRate = learningRate/10 + end + + --print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,optimState.learningRate)) + print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,learningRate)) + + local t_err = train() + table.insert(train_err,t_err) + + + if i%print_step == 0 then + print(confusion_matrix) + table.insert(train_acc,confusion_matrix.averageUnionValid*100) + gnuplot.epsfigure(paths.concat('cachedir',exp_name,'train_err.eps')) + gnuplot.plot('train',torch.Tensor(train_acc),'-') + gnuplot.xlabel('Iterations (200 batch update)') + gnuplot.ylabel('Training accuracy') + gnuplot.grid('on') + gnuplot.plotflush() + gnuplot.closeall() + + confusion_matrix:zero() + end + + if i%100 == 0 then + torch.save(paths.concat('cachedir',exp_name..'.t7'),savedModel) + end +end + +-- test +dsv = nnf.DataSetPascal{image_set='test', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } + + +local fpv = {dataset=dsv} +tester = nnf.Tester_FRCNN(model,fpv) +tester.cachefolder = 'cachedir/'..exp_name +tester:test(num_iter)