Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions pkg/mybib/arxiv.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,54 @@
"""Fetch metadata from arXiv API."""

import sys
import requests
import xml.etree.ElementTree as ET

import requests


def fetch_arxiv_metadata(arxiv_id: str) -> dict:
"""Fetch metadata from arXiv API.

Args:
arxiv_id: arXiv identifier (e.g., '2301.00001')

Returns:
Dictionary with keys: title, authors, journal, year, doi, link, arxiv_id

Raises:
SystemExit: If API call fails or no entry found
"""
url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
response = requests.get(url)

if response.status_code != 200:
print(f"Error fetching arxiv metadata: {response.status_code}")
sys.exit(1)

root = ET.fromstring(response.content)
ns = {
'atom': 'http://www.w3.org/2005/Atom',
'arxiv': 'http://arxiv.org/schemas/atom'
"atom": "http://www.w3.org/2005/Atom",
"arxiv": "http://arxiv.org/schemas/atom",
}
entry = root.find('atom:entry', ns)

entry = root.find("atom:entry", ns)
if entry is None:
print("No entry found for this arxiv ID.")
sys.exit(1)

title = entry.find('atom:title', ns).text.strip().replace('\n', ' ')
authors = ', '.join(
author.find('atom:name', ns).text
for author in entry.findall('atom:author', ns)

title = entry.find("atom:title", ns).text.strip().replace("\n", " ")
authors = ", ".join(
author.find("atom:name", ns).text for author in entry.findall("atom:author", ns)
)
published = entry.find('atom:published', ns).text
published = entry.find("atom:published", ns).text
year = int(published[:4])
doi_elem = entry.find('arxiv:doi', ns)

doi_elem = entry.find("arxiv:doi", ns)
doi = doi_elem.text if doi_elem is not None else arxiv_id
journal_elem = entry.find('arxiv:journal_ref', ns)

journal_elem = entry.find("arxiv:journal_ref", ns)
journal = journal_elem.text if journal_elem is not None else "arXiv"

return {
"title": title,
"authors": authors,
Expand Down
77 changes: 38 additions & 39 deletions pkg/mybib/bibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,75 +5,74 @@

def generate_bibtex(df: pd.DataFrame) -> str:
"""Generate BibTeX entries from a DataFrame of references.

Args:
df: DataFrame with columns: Title, Authors, Journal, Year, DOI, Link

Returns:
String containing BibTeX formatted entries
"""
if df.empty:
return "% No references found\n"

# Handle cases where DataFrame column names might not exactly match
# Create a mapping of expected columns to actual columns
col_mapping = {}

for col in df.columns:
col_lower = col.lower().strip()
if col_lower == 'title':
col_mapping['title'] = col
elif col_lower == 'authors':
col_mapping['authors'] = col
elif col_lower == 'journal':
col_mapping['journal'] = col
elif col_lower == 'year':
col_mapping['year'] = col
elif col_lower == 'doi':
col_mapping['doi'] = col
elif col_lower == 'link':
col_mapping['link'] = col
elif col_lower == 'url':
col_mapping['link'] = col
if col_lower == "title":
col_mapping["title"] = col
elif col_lower == "authors":
col_mapping["authors"] = col
elif col_lower == "journal":
col_mapping["journal"] = col
elif col_lower == "year":
col_mapping["year"] = col
elif col_lower == "doi":
col_mapping["doi"] = col
elif col_lower == "link":
col_mapping["link"] = col
elif col_lower == "url":
col_mapping["link"] = col

entries = []

for _, row in df.iterrows():
# Extract values using mapped column names
title = str(row.get(col_mapping.get('title', 'Title'), "")).strip()
authors = str(row.get(col_mapping.get('authors', 'Authors'), "")).strip()
journal = str(row.get(col_mapping.get('journal', 'Journal'), "")).strip()
year = str(row.get(col_mapping.get('year', 'Year'), "")).strip()
doi = str(row.get(col_mapping.get('doi', 'DOI'), "")).strip()
link = str(row.get(col_mapping.get('link', 'Link'), "")).strip()
title = str(row.get(col_mapping.get("title", "Title"), "")).strip()
authors = str(row.get(col_mapping.get("authors", "Authors"), "")).strip()
journal = str(row.get(col_mapping.get("journal", "Journal"), "")).strip()
year = str(row.get(col_mapping.get("year", "Year"), "")).strip()
doi = str(row.get(col_mapping.get("doi", "DOI"), "")).strip()
link = str(row.get(col_mapping.get("link", "Link"), "")).strip()

# Skip entries with missing critical fields
if not title:
continue

# Use DOI as the key, or create one from title if DOI is missing
if doi and doi != "nan":
key = doi
else:
# Fallback: create key from title
key = title.lower().replace(" ", "_").replace(":", "")[:30]

# Build BibTeX entry
entry = f"@article{{{key},\n"
entry += f' title={{{title}}},\n'
entry += f" title={{{title}}},\n"

if authors:
entry += f' author={{{authors}}},\n'
entry += f" author={{{authors}}},\n"
if journal:
entry += f' journal={{{journal}}},\n'
entry += f" journal={{{journal}}},\n"
if year and year != "nan":
entry += f' year={{{year}}},\n'
entry += f" year={{{year}}},\n"
if link and link != "nan":
entry += f' url={{{link}}}\n'
entry += f" url={{{link}}}\n"

entry += "}\n"

entries.append(entry)

return "\n".join(entries)

return "\n".join(entries)
52 changes: 29 additions & 23 deletions pkg/mybib/categories.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
"""Category management for bibliography."""

import json
from pathlib import Path
from typing import Dict, List, Tuple


def load_categories(file_path: str = "categories.json") -> Dict[str, str]:
"""Load category mappings from file.

Args:
file_path: Path to categories JSON file

Returns:
Dictionary mapping category ID to category name
"""
Expand All @@ -22,9 +21,11 @@ def load_categories(file_path: str = "categories.json") -> Dict[str, str]:
return {}


def save_categories(categories: Dict[str, str], file_path: str = "categories.json") -> None:
def save_categories(
categories: Dict[str, str], file_path: str = "categories.json"
) -> None:
"""Save category mappings to file.

Args:
categories: Dictionary mapping category ID to category name
file_path: Path to categories JSON file
Expand All @@ -33,65 +34,70 @@ def save_categories(categories: Dict[str, str], file_path: str = "categories.jso
json.dump(categories, f, indent=2, sort_keys=True)


def get_or_create_category(name: str, categories: Dict[str, str] = None) -> Tuple[str, Dict[str, str]]:
def get_or_create_category(
name: str, categories: Dict[str, str] = None
) -> Tuple[str, Dict[str, str]]:
"""Get category ID for given name, creating if needed.

Uses lowercase normalization to group similar categories.

Args:
name: Category name
name: Category name
categories: Existing categories dict (loads from file if not provided)

Returns:
Tuple of (category_id, updated_categories_dict)
"""
if categories is None:
categories = load_categories()

# Normalize category name
normalized = name.lower().strip()

# Check if category already exists (case-insensitive)
for cat_id, cat_name in categories.items():
if cat_name.lower() == normalized:
return cat_id, categories

# Create new category
new_id = str(max(int(cat_id) for cat_id in categories.keys() if cat_id.isdigit()) + 1 if categories else 1)
new_id = str(
max(int(cat_id) for cat_id in categories.keys() if cat_id.isdigit()) + 1
if categories
else 1
)
categories[new_id] = name

return new_id, categories


def list_categories(categories: Dict[str, str] = None) -> List[Tuple[str, str]]:
"""List all categories sorted by ID.

Args:
categories: Category mapping dict (loads from file if not provided)

Returns:
List of (id, name) tuples sorted by ID
"""
if categories is None:
categories = load_categories()

return sorted(
categories.items(),
key=lambda x: int(x[0]) if x[0].isdigit() else float('inf')
categories.items(), key=lambda x: int(x[0]) if x[0].isdigit() else float("inf")
)


def get_category_name(cat_id: str, categories: Dict[str, str] = None) -> str:
"""Get category name by ID.

Args:
cat_id: Category ID
categories: Category mapping dict (loads from file if not provided)

Returns:
Category name, or empty string if not found
"""
if categories is None:
categories = load_categories()

return categories.get(str(cat_id), "")
Loading