"""
String-to-ascii and ascii-to-string convertion methods.
"""
import numpy as np
[docs]def str_to_ascii(input_str):
"""Converts a string to an ascii encoded numpy array.
Converts a single string of characters into a numpy array
coded as ascii.
Parameters
----------
input_str : str
String data.
Returns
-------
np.ndarray
Uni-dimensional array of char values encoded in ASCII format.
Examples
--------
Convert a string to numpy array.
>>> from dbcollection.utils.string_ascii import str_to_ascii
>>> str_to_ascii('string1')
array([115, 116, 114, 105, 110, 103, 49], dtype=uint8)
"""
return np.array([ord(c) for c in input_str], dtype=np.uint8)
[docs]def ascii_to_str(input_array):
"""Converts an ascii encoded numpy array to a string.
Parameters
----------
input_array : np.ndarray
Input array vector (should be of type dtype=numpy.uint8)
Returns
-------
str
Single string.
Examples
--------
Convert a numpy array to string.
>>> import numpy as np
>>> from dbcollection.utils.string_ascii import ascii_to_str
>>> ascii_to_str(np.array([115, 116, 114, 105, 110, 103, 49], dtype=uint8))
'string1'
"""
return "".join([chr(item) for item in input_array])
[docs]def convert_str_to_ascii(inp_str):
"""Convert a list of strings into an ascii encoded numpy array.
Converts a string or list of strings to a numpy array. The array size is
defined by the size of string plus one. This is needed for ascii to str
convertion in lua using ffi.string() which expects a 0 at the end of an
array.
If a list of strings is used, the size of the array is defined by the size
of the longest string (plus one), and zero padded to maitain the array
shape.
Parameters
----------
inp_str : str/list/tuple
String or list of strings to convert to an ascii array.
Returns
-------
np.ndarray
Single/multi-dimensional array of ASCII encoded strings.
Examples
--------
Example1: Convert a string to a numpy array encoded into ASCII values.
>>> from dbcollection.utils.string_ascii import convertstr_to_ascii
>>> convertstr_to_ascii('string1')
array([115, 116, 114, 105, 110, 103, 49, 0], dtype=uint8)
Example2: Convert a list of lists into an ASCII array.
>>> from dbcollection.utils.string_ascii import convertstr_to_ascii
>>> convertstr_to_ascii(['string1', 'string2', 'string3'])
array([[115, 116, 114, 105, 110, 103, 49, 0],
[115, 116, 114, 105, 110, 103, 50, 0],
[115, 116, 114, 105, 110, 103, 51, 0]], dtype=uint8)
"""
# check if list
if isinstance(inp_str, tuple):
inp_str = list(inp_str)
elif isinstance(inp_str, str):
inp_str = [inp_str]
# get max size of the list strings
max_size = max([len(a) for a in inp_str])
# allocate array
ascii_array = np.zeros([len(inp_str), max_size + 1], dtype=np.uint8)
# iteratively copy data to the array
for i, val in enumerate(inp_str):
ascii_array[i, :len(val)] = str_to_ascii(val)
if len(inp_str) > 1:
return ascii_array
else:
return ascii_array[0]
[docs]def convert_ascii_to_str(input_array):
"""Convert a numpy array to a string (or a list of strings)
Parameters
----------
input_array : np.ndarray
Array of strings encoded in ASCII format.
Returns
-------
str/list
String or list of strings.
Examples
--------
Convert a numpy array to a string.
>>> from dbcollection.utils.string_ascii import convert_ascii_to_str
>>> import numpy as np
>>> # ascii format of 'string1'
>>> tensor = np.array([[115, 116, 114, 105, 110, 103, 49, 0]], dtype=np.uint8)
>>> convert_ascii_to_str(tensor)
['string1']
"""
list_str = input_array.tolist()
if input_array.ndim > 1:
return [ascii_to_str(list(filter(lambda x: x > 0, str_))) for str_ in list_str]
else:
return ascii_to_str(list(filter(lambda x: x > 0, list_str)))