Source code for cf_pandas.vocab

"""Class for creating and working with vocabularies."""

import json
import pathlib
from collections import defaultdict
from typing import DefaultDict, Dict, Optional, Sequence, Union

from .utils import astype


[docs]class Vocab(object): """Class to handle vocabularies.""" def __init__(self, openname: Optional[str] = None): self.vocab: DefaultDict[str, Dict[str, str]] if openname is not None: self.vocab = defaultdict(dict, self.open_file(openname)) else: self.vocab = defaultdict(dict) def __repr__(self): """Representation.""" return dict(self.vocab).__repr__()
[docs] def make_entry( self, nickname: str, expressions: Union[str, list], attr: str = "standard_name" ): """Make an entry for vocab. Parameters ---------- nickname: str The nickname to call the variable being represented in this entry. expressions: str, list Regular expression(s) to use to select out the variable in a regex match. Multiple expressions input in a list are piped together to create one str of expressions. attr: str What attribute to identify the regular expressions with. Default is "standard_name", but other reasonable options are any variable attributes in a netcdf file such as "units", "name", and "long_name". Examples -------- The following creates an entry in the vocabulary stored in `vocab.vocab`. It doesn't print the entry but it has been pasted in below the example to show what it looks like. >>> import cf_pandas as cfp >>> vocab = cfp.Vocab() >>> vocab.make_entry("temp", ["a","b"], attr="name") {'temp': {'standard_name': 'a|b'}}) """ expressions = astype(expressions, list) entry: DefaultDict[str, Dict[str, str]] = defaultdict(dict) entry[nickname][attr] = "|".join(expressions) self.__iadd__(entry) return self
[docs] def add( self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], method: str ) -> "Vocab": """Add two Vocab objects together... by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. This is used for both `__add__` and `__iadd__`. Parameters ---------- other_vocab: Vocab Other Vocab object to combine with. method : str Whether to run as "add" which returns a new Vocab object or "iadd" which adds to the original object. Returns ------- Vocab vocab + other_vocab either as a new object or in place. """ if isinstance(other_vocab, Vocab): other_vocab = other_vocab.vocab if method == "add": output = Vocab() elif method == "iadd": output = self nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) for nickname in nicknames: # gather all attributes under nickname as a set to compare their expressions attributes = set( list(self.vocab[nickname].keys()) + list(other_vocab[nickname].keys()) ) # pipe together expressions for nickname-attribute pairs for attribute in attributes: new_expressions = ( self.vocab[nickname].get(attribute, "") + "|" + other_vocab[nickname].get(attribute, "") ).strip("|") output.vocab[nickname][attribute] = new_expressions return output
def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): """vocab1 + vocab2""" return self.add(other_vocab, "add") def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): """vocab1 += vocab2""" return self.add(other_vocab, "iadd") def __radd__( self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"] ) -> "Vocab": """right add?""" return self.__add__(other_vocab)
[docs] def save(self, savename: Union[str, pathlib.PurePath]): """Save to file. Parameters ---------- savename: str, PurePath Filename to save to. """ a_file = open(astype(savename, pathlib.PurePath).with_suffix(".json"), "w") json.dump(self.vocab, a_file) a_file.close()
[docs] def open_file(self, openname: Union[str, pathlib.PurePath]): """Open previously-saved vocab. Parameters ---------- openname: str Where to find vocab to open. """ return json.loads( open(pathlib.PurePath(openname).with_suffix(".json"), "r").read() )
[docs]def merge(vocabs: Sequence[Vocab]) -> Vocab: """Add together multiple Vocab objects. Parameters ---------- vocabs : Sequence[Vocab] Sequence of Vocab objects to merge. Returns ------- Vocab Single Vocab object made up of input vocabs. """ final_vocab = Vocab() for vocab in vocabs: final_vocab += vocab return final_vocab