Skip to content Skip to sidebar Skip to footer

How Do I Convert A .dbf File Into A Pandas DataFrame?

I have a .dbf file that I would like to convert into a pandas DataFrame, but DataFrames aren't able to directly convert the data.

Solution 1:

Using my dbf library, the following function will do the job:

def dbf_to_dataframe(filename):
    """
    converts the dbf table at filename into a Panda's DataFrame
    data types and field names are preserved
    """
    import dbf
    import numpy as np
    import pandas as pd
    from datetime import date, datetime, time
    names = []
    types = []
    table = dbf.Table(filename)
    for name in table.field_names:
        ftype, size, decimals, _ = table.field_info(name)
        ftype = chr(ftype)
        if ftype in 'GP':
            continue
        if ftype == 'N' and decimals:
            ftype = 'F'
        dtype = {
                'B': 'float64',
                'C': 'string',
                'D': 'datetime64[ns]',
                'F': 'float64',
                'I': 'int64',
                'L': 'boolean',
                'M': 'string',
                'N': 'int64',
                'T': 'datetime64[ns]',
                'Y': 'float64',
                }[ftype]
        names.append(name)
        types.append(dtype)
    with table:
        series = [[] for _ in names]
        for rec in table:
            for i, value in enumerate(rec):
                if isinstance(value, date):
                    value = datetime.combine(value, time())
                elif value is None:
                    value = np.nan
                series[i].append(value)
        data_recs = dict(
                (n, pd.Series(s, dtype=t))
                for n, s, t in zip(names, series, types)
                )
        return pd.DataFrame(data_recs)

Post a Comment for "How Do I Convert A .dbf File Into A Pandas DataFrame?"