Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions pkg/mybib/arxiv.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,59 @@
"""Fetch metadata from arXiv API."""

import sys
import requests
import xml.etree.ElementTree as ET

import requests


def fetch_arxiv_metadata(arxiv_id: str) -> dict:
"""Fetch metadata from arXiv API.

Args:
arxiv_id: arXiv identifier (e.g., '2301.00001')

Returns:
Dictionary with keys: title, authors, journal, year, doi, link

Raises:
SystemExit: If API call fails or no entry found
"""
url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
response = requests.get(url)

if response.status_code != 200:
print(f"Error fetching arxiv metadata: {response.status_code}")
sys.exit(1)

root = ET.fromstring(response.content)
ns = {
'atom': 'http://www.w3.org/2005/Atom',
'arxiv': 'http://arxiv.org/schemas/atom'
"atom": "http://www.w3.org/2005/Atom",
"arxiv": "http://arxiv.org/schemas/atom",
}
entry = root.find('atom:entry', ns)

entry = root.find("atom:entry", ns)
if entry is None:
print("No entry found for this arxiv ID.")
sys.exit(1)

title = entry.find('atom:title', ns).text.strip().replace('\n', ' ')
authors = ', '.join(
author.find('atom:name', ns).text
for author in entry.findall('atom:author', ns)

title = entry.find("atom:title", ns).text.strip().replace("\n", " ")
authors = ", ".join(
author.find("atom:name", ns).text for author in entry.findall("atom:author", ns)
)
published = entry.find('atom:published', ns).text
published = entry.find("atom:published", ns).text
year = int(published[:4])
doi_elem = entry.find('arxiv:doi', ns)

doi_elem = entry.find("arxiv:doi", ns)
doi = doi_elem.text if doi_elem is not None else arxiv_id
journal_elem = entry.find('arxiv:journal_ref', ns)

journal_elem = entry.find("arxiv:journal_ref", ns)
journal = journal_elem.text if journal_elem is not None else "arXiv"

return {
"title": title,
"authors": authors,
"journal": journal,
"year": year,
"doi": doi,
"link": f"https://arxiv.org/abs/{arxiv_id}"
"link": f"https://arxiv.org/abs/{arxiv_id}",
}
77 changes: 38 additions & 39 deletions pkg/mybib/bibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,75 +5,74 @@

def generate_bibtex(df: pd.DataFrame) -> str:
"""Generate BibTeX entries from a DataFrame of references.

Args:
df: DataFrame with columns: Title, Authors, Journal, Year, DOI, Link

Returns:
String containing BibTeX formatted entries
"""
if df.empty:
return "% No references found\n"

# Handle cases where DataFrame column names might not exactly match
# Create a mapping of expected columns to actual columns
col_mapping = {}

for col in df.columns:
col_lower = col.lower().strip()
if col_lower == 'title':
col_mapping['title'] = col
elif col_lower == 'authors':
col_mapping['authors'] = col
elif col_lower == 'journal':
col_mapping['journal'] = col
elif col_lower == 'year':
col_mapping['year'] = col
elif col_lower == 'doi':
col_mapping['doi'] = col
elif col_lower == 'link':
col_mapping['link'] = col
elif col_lower == 'url':
col_mapping['link'] = col
if col_lower == "title":
col_mapping["title"] = col
elif col_lower == "authors":
col_mapping["authors"] = col
elif col_lower == "journal":
col_mapping["journal"] = col
elif col_lower == "year":
col_mapping["year"] = col
elif col_lower == "doi":
col_mapping["doi"] = col
elif col_lower == "link":
col_mapping["link"] = col
elif col_lower == "url":
col_mapping["link"] = col

entries = []

for _, row in df.iterrows():
# Extract values using mapped column names
title = str(row.get(col_mapping.get('title', 'Title'), "")).strip()
authors = str(row.get(col_mapping.get('authors', 'Authors'), "")).strip()
journal = str(row.get(col_mapping.get('journal', 'Journal'), "")).strip()
year = str(row.get(col_mapping.get('year', 'Year'), "")).strip()
doi = str(row.get(col_mapping.get('doi', 'DOI'), "")).strip()
link = str(row.get(col_mapping.get('link', 'Link'), "")).strip()
title = str(row.get(col_mapping.get("title", "Title"), "")).strip()
authors = str(row.get(col_mapping.get("authors", "Authors"), "")).strip()
journal = str(row.get(col_mapping.get("journal", "Journal"), "")).strip()
year = str(row.get(col_mapping.get("year", "Year"), "")).strip()
doi = str(row.get(col_mapping.get("doi", "DOI"), "")).strip()
link = str(row.get(col_mapping.get("link", "Link"), "")).strip()

# Skip entries with missing critical fields
if not title:
continue

# Use DOI as the key, or create one from title if DOI is missing
if doi and doi != "nan":
key = doi
else:
# Fallback: create key from title
key = title.lower().replace(" ", "_").replace(":", "")[:30]

# Build BibTeX entry
entry = f"@article{{{key},\n"
entry += f' title={{{title}}},\n'
entry += f" title={{{title}}},\n"

if authors:
entry += f' author={{{authors}}},\n'
entry += f" author={{{authors}}},\n"
if journal:
entry += f' journal={{{journal}}},\n'
entry += f" journal={{{journal}}},\n"
if year and year != "nan":
entry += f' year={{{year}}},\n'
entry += f" year={{{year}}},\n"
if link and link != "nan":
entry += f' url={{{link}}}\n'
entry += f" url={{{link}}}\n"

entry += "}\n"

entries.append(entry)

return "\n".join(entries)

return "\n".join(entries)
Loading