diff --git a/classifier/scheme/street_name.js b/classifier/scheme/street_name.js index 2e2ce0c6..96ce9296 100644 --- a/classifier/scheme/street_name.js +++ b/classifier/scheme/street_name.js @@ -53,6 +53,24 @@ module.exports = [ } ] }, + { + // du 4 septembre + confidence: 0.5, + Class: StreetNameClassification, + scheme: [ + { + is: ['StopWordClassification'] + }, + { + is: ['NumericClassification'], + not: ['PostcodeClassification'] + }, + { + is: ['AlphaClassification'], + not: ['StreetClassification', 'IntersectionClassification', 'LocalityClassification'] + } + ] + }, { // dos Fiéis de Deus confidence: 0.5, diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 74edebd1..1f7a917e 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -29,6 +29,7 @@ const TokenDistanceFilter = require('../solver/TokenDistanceFilter') const MustNotPreceedFilter = require('../solver/MustNotPreceedFilter') const MustNotFollowFilter = require('../solver/MustNotFollowFilter') const SubsetFilter = require('../solver/SubsetFilter') +const FillSolver = require('../solver/FillSolver') class AddressParser extends Parser { constructor (options) { @@ -108,7 +109,8 @@ class AddressParser extends Parser { new MustNotFollowFilter('LocalityClassification', 'RegionClassification'), new MustNotFollowFilter('LocalityClassification', 'CountryClassification'), new TokenDistanceFilter(), - new SubsetFilter() + new SubsetFilter(), + new FillSolver() ], options ) diff --git a/solver/FillSolver.js b/solver/FillSolver.js new file mode 100644 index 00000000..eb8ab342 --- /dev/null +++ b/solver/FillSolver.js @@ -0,0 +1,29 @@ +class FillSolver { + solve (tokenizer) { + tokenizer.solution.forEach(solution => { + // Get streets from this solution + const streets = solution.pair.filter(p => p.classification.constructor.name === 'StreetClassification') + // Get all nodes that are not in the solution + const missings = tokenizer.section.reduce((acc, section) => { + return acc.concat(section.graph.findAll('child').filter(c => !solution.pair.some(p => p.span.intersects(c)))) + }, []) + + // For all missing spans, check if they are street prefix and complete the solution + // The missing span should not be a end token + missings.forEach(missing => { + const street = streets.find(s => s.span.end === missing.start - 1) + const prefix = street && street.span.graph.findOne('child:first') + + if (prefix && prefix.classifications.StreetPrefixClassification && !missing.classifications.EndTokenClassification) { + const span = prefix.graph.findAll('parent').find(phrase => phrase.start === prefix.start && phrase.end === missing.end) + const streetIndex = solution.pair.indexOf(street) + if (span && streetIndex < solution.pair.length) { + solution.pair[streetIndex].span = span + } + } + }) + }) + } +} + +module.exports = FillSolver diff --git a/test/address.fra.test.js b/test/address.fra.test.js index 50de25e0..bbde5614 100644 --- a/test/address.fra.test.js +++ b/test/address.fra.test.js @@ -116,6 +116,26 @@ const testcase = (test, common) => { assert(`Rue de l'Adjudant Réau Paris`, [ { street: `Rue de l'Adjudant Réau` }, { locality: 'Paris' } ]) + + assert('16 Rue Des Petits Carreaux', [ + { housenumber: '16' }, { street: 'Rue Des Petits' } + ]) + + assert('16 Rue Des Petits Carreaux Paris', [ + { housenumber: '16' }, { street: 'Rue Des Petits Carreaux' }, { locality: 'Paris' } + ]) + + assert('Rue Saint-Germain Dou', [ + { street: 'Rue Saint-Germain' } + ]) + + assert('Rue Saint-Germain Dourdan', [ + { street: 'Rue Saint-Germain' }, { locality: 'Dourdan' } + ]) + + assert('Rue du 8 Mai Chevreuse', [ + { street: 'Rue du 8 Mai' }, { locality: 'Chevreuse' } + ]) } module.exports.all = (tape, common) => {