# -*- coding: utf-8 -*-
##********************************************************************************************************************************************************
##
##  Script to convert a BibTeX file into a reStructuredText (.rst) bibliography list.
##  Each BibTeX entry is transformed into a single line in the RST file following a citation
##  format with authors, year, journal, title, Bibcode, and ADS/arXiv/DOI URL.
##  Copyright (C) 2012 - 2024  Thomas Moeller
##
##  I. Physikalisches Institut, University of Cologne
##
##
##  Usage:
##
##      python3 convert_bibtex_to_rst.py bib/export-bibtex.bib out.txt
##
##
##
##  The following functions are included in this module:
##
##      - function clean_authors:       Formats the BibTeX author string for display.
##      - function wrap_entry:          wrap line of rst file
##      - function format_entry:        Converts a BibTeX entry dictionary into RST citation format.
##      - function bibtex_to_rst:       Reads a BibTeX file and writes the corresponding RST citations to output.
##      - function parse_args:          Processes command-line arguments for input/output filenames.
##
##
##
##  Versions of the program:
##
##  Who           When         What
##
##  T. Moeller    2025-09-01   initial version
##
##********************************************************************************************************************************************************


##--------------------------------------------------------------------------------------------------------------------------------------------------------
import bibtexparser
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import convert_to_unicode
import argparse
import textwrap
##--------------------------------------------------------------------------------------------------------------------------------------------------------


##--------------------------------------------------------------------------------------------------------------------------------------------------------
## Format BibTeX author string to 'Last, F.' style, joined by comma.
def clean_authors(author_str):
    """
    Input: BibTeX author field string.
    Output: formatted author names string.
    """

    # Debug:
    # print ("author_str = ", author_str)


    authors = author_str.split(' and ')
    formatted = []
    for person in authors:
        person = person.strip()
        parts = person.split(',')
        if len(parts) == 2:
            # person = person.replace('{', '').replace('}', '').strip()
            last = parts[0].strip('{}').strip()
            initials = parts[1].strip().replace('.', '. ').replace('  ', ' ')
            formatted.append(f"{last}, {initials}")
        else:
            formatted.append(person.strip('{}').strip())


    ## set return parameter
    return ', '.join(formatted)
##--------------------------------------------------------------------------------------------------------------------------------------------------------


##--------------------------------------------------------------------------------------------------------------------------------------------------------
## wrap line of rst file
def wrap_entry(formatted_line, max_character = 85):
    """
    Input:  the unwrapped line
            the max. number of characters per line
    Output: list of wrapped lines (after wrapping on word boundaries)
    """

    # Debug:
    # print ("formatted_line = ", formatted_line)
    # print ("max_character = ", max_character)


    ## Use textwrap to wrap the line at word boundaries
    formatted_entries = textwrap.wrap(formatted_line, width = max_character)


    # Prefix additional strings with "   "
    for i in range(len(formatted_entries)):
        local_line = formatted_entries[i].strip()
        local_line = local_line.replace("``", chr(34))
        local_line = local_line.replace("''", chr(34))
        local_line = local_line.replace("\\apjs", "ApJS")
        local_line = local_line.replace("\\apjl", "ApJL")
        local_line = local_line.replace("\\apj", "ApJ")
        local_line = local_line.replace("\\aap", "A&A")
        local_line = local_line.replace("åp", "A&A")
        local_line = local_line.replace("\\aj", "AJ")
        local_line = local_line.replace("\\araa", "Annual Review of Astronomy and Astrophysics")
        local_line = local_line.replace("\\memsai", "Memorie della Società Astronomica Italiana")
        local_line = local_line.replace("\\nat", "Nature")
        local_line = local_line.replace("\\mnras", "MNRAS")
        local_line = local_line.replace("\\pasp", "PASP")
        if (i > 0):
            local_line = "   " + local_line
        formatted_entries[i] = local_line

    # Debug:
    # print ("formatted_entries = ", formatted_entries)


    ## set return parameter
    return formatted_entries
##--------------------------------------------------------------------------------------------------------------------------------------------------------


##--------------------------------------------------------------------------------------------------------------------------------------------------------
## Convert a BibTeXParser entry to an RST citation line.
def format_entry(entry):
    """
    Input: BibTeX entry dict.
    Output: formatted RST string.
    """

    # Debug:
    # print ("entry = ", entry)


    authors = entry.get('author', '')
    authors_formatted = clean_authors(authors)
    year = entry.get('year', '?')
    title = entry.get('title', '').replace('{', '').replace('}', '').strip()
    journal = entry.get('journal', '').replace('{', '').replace('}', '').strip()
    volume = entry.get('volume', '').replace('{', '').replace('}', '').strip()
    number = entry.get('number', '').replace('{', '').replace('}', '').strip()
    pages = entry.get('pages', '').replace('{', '').replace('}', '').strip()
    eid = entry.get('eid', '').replace('{', '').replace('}', '').strip()
    meta_info = title
    if volume:
        meta_info += f", {volume}"
    if number:
        meta_info += f", {number}"
    if pages and pages != eid:
        meta_info += f", {pages}"
    elif eid:
        meta_info += f", {eid}"
    bibcode = entry.get('ID', entry.get('bibcode', entry.get('doi', '')))
    adsurl = entry.get('adsurl', '')
    arxiv_id = entry.get('eprint', '')
    if not adsurl and arxiv_id:
        adsurl = f"https://arxiv.org/abs/{arxiv_id}"


    ## construct line of rst file
    formatted_line = f"#. {authors_formatted} ({year}), {journal}, {meta_info} "
    entry_rst = wrap_entry(formatted_line)
    url_line = f"   `{bibcode} <{adsurl}>`__."
    entry_rst.append(url_line)
    url_line = f"   |br| |br|\n"
    entry_rst.append(url_line)

    # Debug:
    # print ("entry_rst = ", entry_rst)


    ## set return parameter
    return entry_rst
##--------------------------------------------------------------------------------------------------------------------------------------------------------


##--------------------------------------------------------------------------------------------------------------------------------------------------------
## Convert all BibTeX entries in a file to RST with one line per entry.
def bibtex_to_rst(bib_filename, rst_filename):
    """
    Input:  bibtex and rst filenames.
    Output: writes formatted RST bibliography.
    """

    # Debug:
    # print ("bib_filename = ", bib_filename)
    # print ("rst_filename = ", rst_filename)


    with open(bib_filename) as bibtex_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

    # with open(bib_filename, encoding = 'utf-8') as bibtex_file:
    #     bib_database = bibtexparser.load(bibtex_file)


    entries_rst = []
    for entry in bib_database.entries:
        local_lines = format_entry(entry)
        for line in local_lines:
            entries_rst.append(line)

    # Debug:
    # print ("entries_rst = ", entries_rst)


    with open(rst_filename, 'w', encoding = 'utf-8') as f:
        for rst in entries_rst:
            f.write(rst + '\n')


    ## we're done
    return
##--------------------------------------------------------------------------------------------------------------------------------------------------------


##--------------------------------------------------------------------------------------------------------------------------------------------------------
## Parse command-line arguments for input/output filenames.
def parse_args():
    """
    Returns: Tuple with bibfile and rstfile strings, defaults to input.rst for output file if not specified."""
    parser = argparse.ArgumentParser(description="Convert BibTeX file to reStructuredText bibliography.")
    parser.add_argument("bibfile", help="Filename for BibTeX input.")
    parser.add_argument("rstfile", nargs="?", help="Filename for RST output.")
    args = parser.parse_args()
    bibfile = args.bibfile
    rstfile = args.rstfile if args.rstfile is not None else "out.rst"


    ## set return parameters
    return bibfile, rstfile
##--------------------------------------------------------------------------------------------------------------------------------------------------------


##--------------------------------------------------------------------------------------------------------------------------------------------------------
## Main block to handle command line and run conversion.
if __name__ == "__main__":


    ## analyze command line arguments
    bibfile, rstfile = parse_args()


    ## convert bib file
    bibtex_to_rst(bibfile, rstfile)


## we're done
##--------------------------------------------------------------------------------------------------------------------------------------------------------
##--------------------------------------------------------------------------------------------------------------------------------------------------------
##--------------------------------------------------------------------------------------------------------------------------------------------------------
