Random String Generation module in Python

Posted on

Problem

The module is for facilitating random string generation. The example I use to explain it is the following:

Instead of writing

#vars
age = random.randint(18, 20)

#categories
names = ('Mark', 'Sally')
adjs = ('wacky', 'cool')
adjs_probability_map = (0.1, 0.99)
hellos = ('Hi!', 'Hey!', 'Good morning.')
hellos_probability_map = (0.5, 0.25, 0.25)

#calculation
name = random.choice(names)
if name is 'Mark':
    age += 2
adj = random.choices(adjs, adjs_probability_map)[0]
hello = random.choices(hellos, hellos_probability_map)[0]

#string
print(f'{hello} My name is {adj} {name}. I am {age} years old')

#Example output: Hey! My name is cool Mark. I am 21 years old.

with this module you can write

import rrsg

string = rrsg.generate("""
    &age = (18, 20)

    $name
    Mark {age += 2}
    Sally

    $adj
    wacky {1%}
    cool

    >[Hi! | Hey! | Good morning.] My name is $adj $name. I am &age years old.
""").strings()[0]

print(string)

which improves (at least that’s the purpose) the readability and, at the same time, makes it easy to have string templates in text files to use for random generation.

I’ve posted all the documentation on Github, and it might be useful to check it out to understand the generator syntax (those documents themselves might need some reviewing, as it is the first time I’m writing documentation).

Anyway, the code in __init__ is:

import re, random as rnd

class RandomGenerationError(Exception):
    """General exception for rrsg"""
    def __init__(self, message):
        super().__init__(message)

class _Compiler:

    def __init__(self, generator):
        self.generator = generator

    def _compile_var(self, var_b):
        """
        var_b = 'var = 1'        TURNS INTO    var_c = (var, (1, 1))
        var_b = 'var = (3,4)'    TURNS INTO    var_c = (var, (3, 4))
        """

        var_name = re.search('.+(?==)', var_b).group()
        range_value, absolute_value = re.search('(?<==)((d+),(d+))', var_b), re.search('(?<==)((d+))', var_b)
        if range_value: # range declaration
            var_c = (var_name, tuple(int(num) for num in range_value.group(1, 2)))
        elif absolute_value: # absolute declaration
            var_c = (var_name, tuple(int(num) for num in absolute_value.group(1, 2)))
        else:
            raise RandomGenerationError('Faulty variable atribution.')
        return var_c

    def _compile_datatag(self, datatag_b):
        """
        datatag_b = '{75%,var+=2}'    TURNS INTO    datatag_c = {'prob': 0.75, 'var': ('+', 2)}
        """
        datatag_c = {}

        for prop in datatag_b[1:-1].split(','): # e.g. ['50%', 'var+=3']
            if '%' in prop: # e.g. 'prob': 0.5
                datatag_c['prob'] = int(re.search('[0-9]{1,2}?(?=%)', prop).group()) * 0.01
            elif '=' in prop: # e.g. 'var': ('+', 3)
                datatag_c[re.search('.*?(?=+=|-=|=)', prop).group()] = (re.search('+|-|=', prop).group(), int(re.search('(?<==)[0-9]+', prop).group()))
            else:
                raise RandomGenerationError('Faulty datatag.')

        return datatag_c

    def _compile_items(self, items_b):
        """
        items_b = ['a', 'b{75%,var+=2}']    TURNS INTO    items_c = {'a': {'prob': 0.25}, 'b': {'prob': 0.75, 'var': ('+', 2)}}
        """
        items_c = {}

        for item_b in items_b:
            datatag = re.search('{.*?}', item_b)
            if datatag:
                items_c[re.search('.+?(?={)', item_b).group()] = self._compile_datatag(datatag.group()) # e.g. b: {'prob': 0.75, 'var': ('+', 2)}
            else:
                items_c[item_b] = {} # e.g. a: {}

        #complete probabilities
        unassigned_prob = (1 - sum(datatag.get('prob', 0) for datatag in items_c.values())) / [datatag.get('prob') for datatag in items_c.values()].count(None)
        for item, datatag in items_c.items():
            if not datatag.get('prob'):
                items_c[item]['prob'] = unassigned_prob

        return items_c

    def _compile_string(self, string_b, categories_c, vars_c):
        """
        string_b = [Hi! | Hey! | Good morning.] My name is $adj $name. I am &age years old.
 TURNS INTO
        string_c = [
            {'Hi!': {'prob': 0.33}, 'Hey!': {'prob': 0.33}, 'Good morning.': {'prob': 0.33}},
            ' My name is ',
            {'cool': 'prob': 0.99, 'wacky': 'prob': 0.01},
            ' ',
            {'Sally': {'prob': 0.5}, 'Mark': {'prob': 0.5, 'age': ('+', 2)}},
            '. I am ',
            ('age', (18, 20)),
            ' years old.'
        ]
        """

        string_c = []

        while string_b:
            category, bitesized, var, text = re.search('(?<=$)[a-zA-Z]+', string_b), re.search('[.*?]', string_b), re.search('(?<=&)[a-zA-Z]+', string_b), re.search('[^&[]|$]+', string_b)
            if category and category.start() is 1:
                string_c += [categories_c[category.group()]]
                string_b = string_b.replace('$' + category.group(), '', 1)
            elif bitesized and bitesized.start() is 0:
                string_c += [self._compile_items(bitesized.group()[1:-1].split('|'))]
                string_b = string_b.replace(bitesized.group(), '', 1)
            elif var and var.start() is 1:
                string_c += [[var_c for var_c in vars_c if var_c[0] == var.group()][-1]]
                string_b = string_b.replace(f'&{var.group()}', '', 1)
            elif text:
                string_c += [text.group()]
                string_b = string_b.replace(text.group(), '', 1)

        return string_c

    def _compile(self):
        """
        Compiles a generator. Returns a list of compiled strings (strings_c).
        """

        vars, categories, strings, querying = [], {}, [], False

        for line in self.generator.split('n'):
            # remove comments
            line = re.sub('//.*', '', line)
            # remove unnecessary spaces
            # region
            line = line.lstrip(' ').rstrip(' ')                     # remove spaces before and after line
            line = re.sub('&(?: +)?', '&', line)                   # remove spaces after '&'
            line = re.sub('$(?: +)?', '$', line)                   # remove spaces after '$'
            line = re.sub('>(?: +)?', '>', line)                   # remove spaces after '>'
            line = re.sub('(?: +)?=(?: +)?', '=', line)             # remove spaces before and after '='
            line = re.sub('(?: +)?(?:+=)(?: +)?', '+=', line)      # remove spaces before and after '+='
            line = re.sub('(?: +)?(?:-=)(?: +)?', '-=', line)       # remove spaces before and after '-='
            line = re.sub('(?: +)?|(?: +)?', '|', line)            # remove spaces before and after '|'
            line = re.sub('(?: +)?,(?: +)?', ',', line)             # remove spaces before and after ','
            line = re.sub('[(?: +)?', '[', line)                   # remove spaces after '['
            line = re.sub('(?: +)?]', ']', line)                   # remove spaces before ']'
            line = re.sub('(?: +)?{(?: +)?', '{', line)            # remove spaces before and after '{'
            line = re.sub('(?: +)?}(?: +)?', '}', line)            # remove spaces before and after '}'
            # endregion
            # ignore empty lines
            if line is '': continue
            # iterpret line
            if line[0] is '&':
                # stop querying for category values and compile last category
                if querying:
                    querying = False
                    categories[category_name] = self._compile_items(categories[category_name])
                # create a new compiled var
                var_s = re.search('(?<=&).+', line).group()
                vars += [self._compile_var(var_s)]
            elif line[0] is '$':
                # start querying for category values and compile last category
                if querying:
                    categories[category_name] = self._compile_items(categories[category_name])
                else:
                    querying = True
                # add a new empty category
                category_name = re.search('(?<=$)[^ n]+', line).group()
                categories[category_name] = []
            elif line[0] is '>':
                # stop querying for category values and compile last category
                if querying:
                    querying = False
                    categories[category_name] = self._compile_items(categories[category_name])
                # add phrase to results
                string_b = re.search('(?<=>).+', line).group()
                strings += [self._compile_string(string_b, categories, vars)]
            elif querying:
                # add raw item to category being queried
                categories[category_name] += [line]

        return Compiled(strings)
    pass

class Compiled:

    def __init__(self, strings_c):
        self.strings_c = strings_c

    def _generate_value(self, value_c):
        """
        value_c = (1,4)    TURNS INTO, E.G.    value_f = 3
        """
        return rnd.randint(*value_c)

    def _generate_item(self, category_c):
        """
        category_c = {'Sally': {'prob': 0.5}, 'Mark': {'prob': 0.5, 'age': ('+', 2)}}    TURNS INTO, E.G.    item_f = ('Mark', ('age', ('+', 2)))
        """
        item = rnd.choices(list(category_c.keys()), [datatag['prob'] for datatag in category_c.values()])[0]
        return (item, list(attr for attr in category_c[item].items() if attr[0] is not 'prob'))

    def _generate_string(self, string_c):
        """
        string_c = [
            {'Hi!': {'prob': 0.33}, 'Hey!': {'prob': 0.33}, 'Good morning.': {'prob': 0.33}},
            ' My name is ',
            {'cool': 'prob': 0.99, 'wacky': 'prob': 0.01},
            ' ',
            {'Sally': {'prob': 0.5}, 'Mark': {'prob': 0.5, 'age': ('+', 2)}},
            '. I am ',
            ('age', (18, 20)),
            ' years old.'
        ]
 TURNS INTO
        string_f = 'Hi! My name is cool Mark. I am 21 years old.'
        """

        var_changes = []


        for i, category_c in (obj_c for obj_c in list(enumerate(string_c)) if type(obj_c[1]) is dict):
            item_f = self._generate_item(category_c)
            string_c[i] = item_f[0]
            if item_f[1]: var_changes += item_f[1]

        for i, var_c in (obj_c for obj_c in enumerate(string_c) if type(obj_c[1]) is tuple):
            var_f = self._generate_value(var_c[1])

            for var, operation in (var_change for var_change in var_changes if var_change[0] == var_c[0]):
                var_f = {'=': lambda x, y: y,
                         '+': lambda x, y: x + y,
                         '-': lambda x, y: x - y}[operation[0]](var_f, operation[1])

            string_c[i] = str(var_f)

        return ''.join(string_c)


    def generate(self):
        """
        Returns a Results object containing all final strings.
        """
        strings_f = []
        for string_c in self.strings_c:
            strings_f += [self._generate_string(string_c)]
        return Results(strings_f)

class Results:
    def __init__(self, strings_f):
        self.strings_f = strings_f

    def strings(self):
        return self.strings_f

def compile(generator):
    """Returns a Compiled object useful when generating multiple times from the same generator."""
    return _Compiler(generator)._compile()

def generate(generator):
    """Returns a Results object containing the generation results and the output data."""
    return compile(generator).generate()

I would like some notes and tips on how my code could be improved, as I am quite new to Python (tested the module on Python 3.7) and I have never tried this type of project.

Solution

1. Quick fixes in the first part

I wouldn’t do that:

names = ('Mark', 'Sally')
adjs = ('wacky', 'cool')
adjs_probability_map = (0.1, 0.99)
hellos = ('Hi!', 'Hey!', 'Good morning.')
hellos_probability_map = (0.5, 0.25, 0.25)
adj = random.choices(adjs, adjs_probability_map)[0]
hello = random.choices(hellos, hellos_probability_map)[0]

instead:

names = {'Mark', 'Sally'}
adjs = {'wacky': 0.1, 'cool': 0.99}
hellos = ('Hi!': 0.5, 'Hey!': 0.25, 'Good morning.': 0.25)
adj = random.choices(*adjs.items())[0]
hello = random.choices(*hellos.items())[0]

Also:

sum(adjs_probability_map) # 1.09

I don’t think that was intended

2. Problems in second part:

re.search('(?<==)((d+),(d+))', var_b), re.search('(?<==)((d+))', var_b)

You have to escape your backslashes

re.search('(?<==)\((\d+),(\d+)\)', var_b), re.search('(?<==)((\d+))', var_b)

def compile(generator):
    """Returns a Compiled object useful when generating multiple times from the same generator."""
    return _Compiler(generator)._compile()

compile is a builtin, don’t overload it


class RandomGenerationError(Exception):
    """General exception for rrsg"""

    def __init__(self, message):
        super().__init__(message)

Equivalent to :

class RandomGenerationError(Exception):
    """General exception for rrsg"""
    pass

Which can be replaced by :


Because they are plenty of more descriptive built-in exceptions and you probably don’t need to create more:

BaseException  
 +-- SystemExit  
 +-- KeyboardInterrupt  
 +-- GeneratorExit  
 +-- Exception  
      +-- StopIteration  
      +-- StopAsyncIteration  
      +-- ArithmeticError  
      |    +-- FloatingPointError  
      |    +-- OverflowError  
      |    +-- ZeroDivisionError  
      +-- AssertionError  
      +-- AttributeError  
      +-- BufferError  
      +-- EOFError  
      +-- ImportError  
      |    +-- ModuleNotFoundError  
      +-- LookupError  
      |    +-- IndexError  
      |    +-- KeyError  
      +-- MemoryError  
      +-- NameError  
      |    +-- UnboundLocalError  
      +-- OSError  
      |    +-- BlockingIOError  
      |    +-- ChildProcessError  
      |    +-- ConnectionError  
      |    |    +-- BrokenPipeError  
      |    |    +-- ConnectionAbortedError  
      |    |    +-- ConnectionRefusedError  
      |    |    +-- ConnectionResetError  
      |    +-- FileExistsError  
      |    +-- FileNotFoundError  
      |    +-- InterruptedError  
      |    +-- IsADirectoryError  
      |    +-- NotADirectoryError  
      |    +-- PermissionError  
      |    +-- ProcessLookupError  
      |    +-- TimeoutError  
      +-- ReferenceError  
      +-- RuntimeError  
      |    +-- NotImplementedError  
      |    +-- RecursionError  
      +-- SyntaxError  
      |    +-- IndentationError  
      |         +-- TabError  
      +-- SystemError  
      +-- TypeError  
      +-- ValueError  
      |    +-- UnicodeError  
      |         +-- UnicodeDecodeError  
      |         +-- UnicodeEncodeError  
      |         +-- UnicodeTranslateError  
      +-- Warning  
           +-- DeprecationWarning  
           +-- PendingDeprecationWarning  
           +-- RuntimeWarning  
           +-- SyntaxWarning  
           +-- UserWarning  
           +-- FutureWarning  
           +-- ImportWarning  
           +-- UnicodeWarning  
           +-- BytesWarning  
           +-- ResourceWarning

Also :

generate runs a function generate on the result of
compile
which calls _compile
on a _Compiler object which returned a Compiled object

That might make sense to you, but I doubt people will take the time to read your program if you name your variables like this.

Leave a Reply

Your email address will not be published. Required fields are marked *