From 2d2dccc413204fd17f19aa7840bdfa7a124455ea Mon Sep 17 00:00:00 2001 From: Thomas Hodgson Date: Fri, 23 Apr 2021 11:12:32 +0200 Subject: [PATCH 1/7] Preserve comments etc. in a bibtex file The script does not create a new database but adds to the entries of the original; this preserves more information, e.g, comments. I have also avoided unnecessary database loading, and switched terminology from 'biblatex' to 'bibtex'. --- selfcites/selfcites.py | 59 +++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/selfcites/selfcites.py b/selfcites/selfcites.py index 13eb326..580d35f 100644 --- a/selfcites/selfcites.py +++ b/selfcites/selfcites.py @@ -6,8 +6,8 @@ Dialectica open access initiative self-citing bibliography file check, (c) Thomas Hodgson 2021 MIT License -The script takes paths as arguments. The assumption is that these are biblatex files. -It looks for biblatex cite commands, and compares these to the biblatex entries in the files. +The script takes paths as arguments. The assumption is that these are bibtex files. +It looks for bibtex cite commands, and compares these to the bibtex entries in the files. The following will be printed, unless the --quiet option is set: - the keys used in citation commands (if any) @@ -15,7 +15,7 @@ - the entries in the file If the option --source argument is given, the script tries to use bibtexparser to make an extended -biblatex file with missing entries added from the argument to --source. +bibtex file with missing entries added from the argument to --source. The new file will have the value of the optional argument --suffix added to the name of the original file. The default value of --suffix is '_extended'. @@ -45,18 +45,20 @@ ) group = parser.add_mutually_exclusive_group() group.add_argument( - "--verbose", "-v", + "--verbose", + "-v", help="Print information about what the script does. Double for full verbosity.", action="count", default=1, - dest="verbose" + dest="verbose", ) group.add_argument( - "--quiet", "-q", + "--quiet", + "-q", help="Do not print information about what the script does", action="store_const", const=0, - dest="verbose" + dest="verbose", ) parser.add_argument( "bibliographies", @@ -64,25 +66,30 @@ nargs="*", ) parser.add_argument( - "--source", "-s", + "--source", + "-s", help="A bibliography file to get missing entries from", ) parser.add_argument( - "--suffix", "-x", + "--suffix", + "-x", help="Add this suffix to new bibliography files", default="_extended", ) parser.add_argument( - "--directory", "-d", - help="Look in this directory for biblatex files", + "--directory", + "-d", + help="Look in this directory for bibtex files", ) parser.add_argument( - "--extension", "-e", + "--extension", + "-e", help="The extension for bibliography files", default="bib", ) parser.add_argument( - "--recursive", "-r", + "--recursive", + "-r", help="Look recursively in subdirectories of the directory specified by --directory", action="store_true", ) @@ -92,7 +99,10 @@ if args.source: import bibtexparser from bibtexparser.bwriter import BibTexWriter - from bibtexparser.bibdatabase import BibDatabase + + # Use bibtexparser to get a database from the file + with open(args.source, "r") as in_file: + source_database = bibtexparser.load(in_file) # Print script information message if args.verbose >= 1: @@ -102,7 +112,7 @@ sep="\n", ) -# Make a list of biblatex files to run on +# Make a list of bibtex files to run on # Take the positional arguments bibliographies = args.bibliographies # Add what is in the specified directory @@ -195,20 +205,15 @@ # Try to get the missing entries from the source bibliography if args.source and missing: - # Use bibtexparser to get a dictionary from the file - with open(args.source, "r") as in_file: - source_database = bibtexparser.load(in_file) - # Use bibtexparser to get a dictionary from the file + # Use bibtexparser to get a database from the file with open(current_file, "r") as in_file: current_database = bibtexparser.load(in_file) # A list of the missing entries, represented as dictionaries missing_entries = [ entry for entry in source_database.entries if entry["ID"] in missing ] - # Create a new database, and add the entries from the current database - # as well as the missing entries - extended_database = BibDatabase() - extended_database.entries = current_database.entries + missing_entries + # Add the missing entries to the current database + current_database.entries += missing_entries # Write to a file writer = BibTexWriter() @@ -217,15 +222,17 @@ root, ext = os.path.splitext(tail) new_file = os.path.join(head, root + args.suffix + ext) with open(new_file, "w") as out_file: - out_file.write(writer.write(extended_database)) + out_file.write(writer.write(current_database)) if args.verbose >= 1: # Print a message about what was written print( - "I looked for any missing entries in the source bibliography: '{}'.".format(args.source), + "I looked for any missing entries in the source bibliography: '{}'.".format( + args.source + ), "I wrote an extended bibliography file: '{}'.".format(new_file), sep="\n", ) except FileNotFoundError: - if args.verbose >=1: + if args.verbose >= 1: print("I couldn't find {}.".format(current_file)) From 4eb7208dccf62decf1bb37e6ad0f465ae898125f Mon Sep 17 00:00:00 2001 From: Thomas Hodgson Date: Sun, 25 Apr 2021 13:32:58 +0200 Subject: [PATCH 2/7] typo --- selfcites/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selfcites/README.md b/selfcites/README.md index 08d95a0..3c477b0 100644 --- a/selfcites/README.md +++ b/selfcites/README.md @@ -35,7 +35,7 @@ the present working one, you should specify their paths, e.g.: python3 ../resources/scripts/selfcites.py myarticle/mybibliography.bib ``` -### Cecking several BibTeX files +### Checking several BibTeX files You can specify several files, or all BibTeX files at a given location: From b9e4507cb551040965825ad93ed86f622021a314 Mon Sep 17 00:00:00 2001 From: Thomas Hodgson Date: Mon, 26 Apr 2021 12:35:14 +0200 Subject: [PATCH 3/7] fix PHONY --- selfcites/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selfcites/Makefile b/selfcites/Makefile index 0cbd8cc..8760047 100644 --- a/selfcites/Makefile +++ b/selfcites/Makefile @@ -1,6 +1,6 @@ -PHONY: test +.PHONY: test test: sample_completed.bib sample_completed.bib: selfcites.py sample.bib source.bib - @python3 selfcites.py sample.bib --source source.bib --suffix _completed \ No newline at end of file + @python3 selfcites.py sample.bib --source source.bib --suffix _completed From 11f03e8c3540e2d95dfd30d5b97aae32f3b50439 Mon Sep 17 00:00:00 2001 From: Thomas Hodgson Date: Mon, 26 Apr 2021 12:38:01 +0200 Subject: [PATCH 4/7] use custom parsers This should avoid warnings about 'nonstandard types' from bibtexparser --- selfcites/selfcites.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/selfcites/selfcites.py b/selfcites/selfcites.py index 580d35f..ba3beba 100644 --- a/selfcites/selfcites.py +++ b/selfcites/selfcites.py @@ -98,11 +98,14 @@ # if a --source option is provided, we need bibtexparser if args.source: import bibtexparser + from bibtexparser.bparser import BibTexParser from bibtexparser.bwriter import BibTexWriter + source_parser = BibTexParser(ignore_nonstandard_types=False) + # Use bibtexparser to get a database from the file with open(args.source, "r") as in_file: - source_database = bibtexparser.load(in_file) + source_database = bibtexparser.load(in_file, source_parser) # Print script information message if args.verbose >= 1: @@ -207,7 +210,8 @@ if args.source and missing: # Use bibtexparser to get a database from the file with open(current_file, "r") as in_file: - current_database = bibtexparser.load(in_file) + current_parser = BibTexParser(ignore_nonstandard_types=False) + current_database = bibtexparser.load(in_file, current_parser) # A list of the missing entries, represented as dictionaries missing_entries = [ entry for entry in source_database.entries if entry["ID"] in missing From bb75f93d0eec46941b58f93242b2442615227c59 Mon Sep 17 00:00:00 2001 From: Thomas Hodgson Date: Mon, 26 Apr 2021 12:39:41 +0200 Subject: [PATCH 5/7] Update sample.bib Added '@online' type, to test --- selfcites/sample.bib | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/selfcites/sample.bib b/selfcites/sample.bib index 77d9ee8..1f2133b 100644 --- a/selfcites/sample.bib +++ b/selfcites/sample.bib @@ -1,5 +1,12 @@ % Encoding: UTF-8 +@online{johnson:2017, + title = {Compositionality}, + author = {Johnson, Michael}, + date = {2017-12-14}, + url = {http://www.iep.utm.edu/composit/}, + urldate = {2017-12-14} +} @book{vanbenthem:1986, address = {Dordrecht}, From 407bd5fea97bba07fb005d9cf012d0324864a0cc Mon Sep 17 00:00:00 2001 From: Thomas Hodgson Date: Mon, 26 Apr 2021 12:40:39 +0200 Subject: [PATCH 6/7] Update source.bib Added '@online' type, to test --- selfcites/source.bib | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/selfcites/source.bib b/selfcites/source.bib index 9fea210..bceced3 100644 --- a/selfcites/source.bib +++ b/selfcites/source.bib @@ -1,5 +1,13 @@ % Encoding: UTF-8 +@online{johnson:2017, + title = {Compositionality}, + author = {Johnson, Michael}, + date = {2017-12-14}, + url = {http://www.iep.utm.edu/composit/}, + urldate = {2017-12-14} +} + @book{geach_pt:1980, address = {Ithaca, New York}, author = {Geach, Peter Thomas}, From 8fcbeee9e7080e7f70ba209da5e4b24b75e3b04b Mon Sep 17 00:00:00 2001 From: Thomas Hodgson Date: Mon, 26 Apr 2021 12:45:45 +0200 Subject: [PATCH 7/7] Update sample_completed.bib Added '@online' type, to test --- selfcites/sample_completed.bib | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/selfcites/sample_completed.bib b/selfcites/sample_completed.bib index 2aa6ad9..c12836a 100644 --- a/selfcites/sample_completed.bib +++ b/selfcites/sample_completed.bib @@ -1,3 +1,7 @@ +@comment{% Encoding: UTF-8} + +@comment{jabref-meta: databaseType:bibtex;} + @book{aloni-etal:2010, address = {Berlin}, author = {Aloni, Maria and Bastiaanse, Harald and @@ -217,6 +221,14 @@ @article{jaskowski:1934 year = {1934} } +@online{johnson:2017, + author = {Johnson, Michael}, + date = {2017-12-14}, + title = {Compositionality}, + url = {http://www.iep.utm.edu/composit/}, + urldate = {2017-12-14} +} + @book{klemke_ed:1970, address = {Urbana, Illinois}, booktitle = {Essays on Russell}, @@ -563,4 +575,3 @@ @book{vonheusinger-etal:2011 Language Meaning. Volume 2}, year = {2011} } -