From f7c155f05963ef1e16f7c0efc7d2a367b2e17bf0 Mon Sep 17 00:00:00 2001 From: Joxit Date: Sat, 25 May 2019 07:31:28 +0200 Subject: [PATCH 1/2] feat(FillSolver): Add FillSolver, it will add unclassified tokens to the street MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used only when StreetPrefixClassification is used. Remove Paris from regions and add cité in street_types. Paris is always used as a locality --- classifier/scheme/street_name.js | 18 ++++++++++++++++++ parser/AddressParser.js | 4 +++- solver/FillSolver.js | 23 +++++++++++++++++++++++ test/address.fra.test.js | 8 ++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 solver/FillSolver.js diff --git a/classifier/scheme/street_name.js b/classifier/scheme/street_name.js index 2e2ce0c6..96ce9296 100644 --- a/classifier/scheme/street_name.js +++ b/classifier/scheme/street_name.js @@ -53,6 +53,24 @@ module.exports = [ } ] }, + { + // du 4 septembre + confidence: 0.5, + Class: StreetNameClassification, + scheme: [ + { + is: ['StopWordClassification'] + }, + { + is: ['NumericClassification'], + not: ['PostcodeClassification'] + }, + { + is: ['AlphaClassification'], + not: ['StreetClassification', 'IntersectionClassification', 'LocalityClassification'] + } + ] + }, { // dos Fiéis de Deus confidence: 0.5, diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 74edebd1..1f7a917e 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -29,6 +29,7 @@ const TokenDistanceFilter = require('../solver/TokenDistanceFilter') const MustNotPreceedFilter = require('../solver/MustNotPreceedFilter') const MustNotFollowFilter = require('../solver/MustNotFollowFilter') const SubsetFilter = require('../solver/SubsetFilter') +const FillSolver = require('../solver/FillSolver') class AddressParser extends Parser { constructor (options) { @@ -108,7 +109,8 @@ class AddressParser extends Parser { new MustNotFollowFilter('LocalityClassification', 'RegionClassification'), new MustNotFollowFilter('LocalityClassification', 'CountryClassification'), new TokenDistanceFilter(), - new SubsetFilter() + new SubsetFilter(), + new FillSolver() ], options ) diff --git a/solver/FillSolver.js b/solver/FillSolver.js new file mode 100644 index 00000000..b0bceb47 --- /dev/null +++ b/solver/FillSolver.js @@ -0,0 +1,23 @@ +class FillSolver { + solve (tokenizer) { + tokenizer.solution.forEach(solution => { + // Get streets from this solution + const streets = solution.pair.filter(p => p.classification.constructor.name === 'StreetClassification') + // Get all nodes that are not in the solution + const missings = tokenizer.section[0].graph.edges.child.filter(c => !solution.pair.some(p => p.span.intersects(c))) + + missings.forEach(missing => { + const street = streets.find(s => s.span.end === missing.start - 1) + const prefix = street && street.span.graph.findOne('child:first') + if (prefix && prefix.classifications.StreetPrefixClassification) { + street.span.setBody(`${street.span.body} ${missing.body}`) + street.span.graph.add('child', missing) + street.span.graph.remove('child:last', street.span.graph.findOne('child:last')) + street.span.graph.add('child:last', missing) + } + }) + }) + } +} + +module.exports = FillSolver diff --git a/test/address.fra.test.js b/test/address.fra.test.js index 50de25e0..4c965e6d 100644 --- a/test/address.fra.test.js +++ b/test/address.fra.test.js @@ -116,6 +116,14 @@ const testcase = (test, common) => { assert(`Rue de l'Adjudant Réau Paris`, [ { street: `Rue de l'Adjudant Réau` }, { locality: 'Paris' } ]) + + assert('16 Rue Des Petits Carreaux', [ + { housenumber: '16' }, { street: 'Rue Des Petits Carreaux' } + ]) + + assert('16 Rue Des Petits Carreaux Paris', [ + { housenumber: '16' }, { street: 'Rue Des Petits Carreaux' }, { locality: 'Paris' } + ]) } module.exports.all = (tape, common) => { From a9811b52f2b5db8c2bd27f0c3f5064344422cf31 Mon Sep 17 00:00:00 2001 From: Joxit Date: Tue, 9 Jul 2019 14:24:25 +0200 Subject: [PATCH 2/2] fix(FillSolver): don't alter spans Now I replace the solution with the correct phrase --- solver/FillSolver.js | 18 ++++++++++++------ test/address.fra.test.js | 14 +++++++++++++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/solver/FillSolver.js b/solver/FillSolver.js index b0bceb47..eb8ab342 100644 --- a/solver/FillSolver.js +++ b/solver/FillSolver.js @@ -4,16 +4,22 @@ class FillSolver { // Get streets from this solution const streets = solution.pair.filter(p => p.classification.constructor.name === 'StreetClassification') // Get all nodes that are not in the solution - const missings = tokenizer.section[0].graph.edges.child.filter(c => !solution.pair.some(p => p.span.intersects(c))) + const missings = tokenizer.section.reduce((acc, section) => { + return acc.concat(section.graph.findAll('child').filter(c => !solution.pair.some(p => p.span.intersects(c)))) + }, []) + // For all missing spans, check if they are street prefix and complete the solution + // The missing span should not be a end token missings.forEach(missing => { const street = streets.find(s => s.span.end === missing.start - 1) const prefix = street && street.span.graph.findOne('child:first') - if (prefix && prefix.classifications.StreetPrefixClassification) { - street.span.setBody(`${street.span.body} ${missing.body}`) - street.span.graph.add('child', missing) - street.span.graph.remove('child:last', street.span.graph.findOne('child:last')) - street.span.graph.add('child:last', missing) + + if (prefix && prefix.classifications.StreetPrefixClassification && !missing.classifications.EndTokenClassification) { + const span = prefix.graph.findAll('parent').find(phrase => phrase.start === prefix.start && phrase.end === missing.end) + const streetIndex = solution.pair.indexOf(street) + if (span && streetIndex < solution.pair.length) { + solution.pair[streetIndex].span = span + } } }) }) diff --git a/test/address.fra.test.js b/test/address.fra.test.js index 4c965e6d..bbde5614 100644 --- a/test/address.fra.test.js +++ b/test/address.fra.test.js @@ -118,12 +118,24 @@ const testcase = (test, common) => { ]) assert('16 Rue Des Petits Carreaux', [ - { housenumber: '16' }, { street: 'Rue Des Petits Carreaux' } + { housenumber: '16' }, { street: 'Rue Des Petits' } ]) assert('16 Rue Des Petits Carreaux Paris', [ { housenumber: '16' }, { street: 'Rue Des Petits Carreaux' }, { locality: 'Paris' } ]) + + assert('Rue Saint-Germain Dou', [ + { street: 'Rue Saint-Germain' } + ]) + + assert('Rue Saint-Germain Dourdan', [ + { street: 'Rue Saint-Germain' }, { locality: 'Dourdan' } + ]) + + assert('Rue du 8 Mai Chevreuse', [ + { street: 'Rue du 8 Mai' }, { locality: 'Chevreuse' } + ]) } module.exports.all = (tape, common) => {