Transforming stored procedures using Python regular expressions

Posted on

Problem

I have to add some extra logic to about 80 similar stored procedures on my DWH server.

I thought that it would take pretty much the same time if I do it with Python and lots of googling (I have no Python experience) or if I do it manually. So I decided to do it “the interesting way”.

My script works, but considering that Python can do cool things with minimal code, I’m sure that it can be shortened.

So here’s the script:

import pyodbc
import re
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=dwh-dev;DATABASE=STA;Trusted_Connection=Yes')
cursor = cnxn.cursor()
cursor.execute("select proc_schema = s.name, name = p.name, definition = object_definition(p.object_id) from sys.procedures p inner join sys.schemas s on s.schema_id = p.schema_id where 1=1 and s.name = 'xpert'")
rows = cursor.fetchall()
for row in rows:
    try:
        part1 = re.sub('(--$$proc_header_end$$rnrnt[-]*rnrn)', 'g<1>tif Config.[conf].[f_get_value_bit] ('STA', 'LoadFromProd_Xpert', 1) = 1rntbeginrnrntt<PLACEHOLDER>rnrntendrntelserntmybeginrnrn', row.definition)
        part2 = re.sub('(--- standard log/error/transaction block  --- footer)', 'myendrnrntg<1>', part1)
        schemaName = row.proc_schema
        tableName = row.name.replace('p_load_', '')
        cursor1 = cnxn.cursor()
        sql = "select c.name from sys.columns c inner join sys.tables t on t.object_id = c.object_id inner join sys.schemas s on t.schema_id = s.schema_id where s.name = '" + schemaName + "' and t.name = '" + tableName + "'"
        cursor1.execute(sql)
        columns = cursor1.fetchall()
        columnDefinition = ''
        firstIteration = 1
        for column in columns:
            if firstIteration == 1:
                columnDefinition = column.name
                firstIteration = 0
            else:
                columnDefinition = columnDefinition + 'rnttt,' + column.name
        replaceString = 'truncate table ' + schemaName + '.' + tableName + ';rnrnttinsert into ' + schemaName + '.' + tableName + 'rntt(rnttt' + columnDefinition + 'rntt)rnttselectrnttt' + columnDefinition + 'rnttfrom [ZSTARS-DWH].STA.' + schemaName + '.' + tableName + ';'
        almostFinal = part2.replace('<PLACEHOLDER>', replaceString)
        oldLogic = re.findall('mybegin([sSwWdD]*)tmyend', almostFinal)
        oldLogicIndented = 'begin' + oldLogic[0].replace('rn', 'rnt') + 'end'
        final = re.sub('mybegin([sSwWdD]*)tmyend', oldLogicIndented, almostFinal)
        text_file = open('UpdateScripts\' + schemaName + '.' + tableName + '.sql', 'w')
        text_file.write(final.replace('create procedure', 'use STArngornalter procedure').replace('rn', 'n'))
        text_file.close()
    except:
        print('Error editing ' + row.proc_schema + '.' + row.name)

and this is one example result definition from the first query before:

CREATE procedure xpert.p_load_t_afk1 (@BatchID uniqueidentifier = null)
as
begin

    -- SET NOCOUNT ON added to prevent extra result sets from interfering with SELECT statements.
        SET NOCOUNT ON;

    --- standard log/error/transaction block  --- header ----
        --$$proc_header_start$$
        SET XACT_ABORT ON;
        DECLARE @CInfo ContextInformation;
        INSERT INTO @CInfo EXECUTE log.p_proc_start @@ProcID, @BatchID OUT;
        BEGIN TRY
        --$$proc_header_end$$

    -------------------------------------------------------

    declare @execution_id bigint
    EXEC [SSISDB].[catalog].[create_execution] @package_name=N't_afk1.dtsx', @execution_id=@execution_id OUTPUT, @folder_name=N'Projects', @project_name=N'AS400 Loads', @use32bitruntime=False, @reference_id=Null
    EXEC [SSISDB].[catalog].[set_execution_parameter_value] @execution_id,  @object_type=50, @parameter_name=N'LOGGING_LEVEL', @parameter_value=1
    exec [SSISDB].[catalog].[set_execution_parameter_value] @execution_id,  @object_type=50, @parameter_name=N'SYNCHRONIZED', @parameter_value=1
    EXEC [SSISDB].[catalog].[start_execution] @execution_id

    --- standard log/error/transaction block  --- footer ----

        EXECUTE log.p_proc_end @CInfo;

        --$$proc_footer_start$$
        END TRY
        BEGIN CATCH
            IF XACT_STATE() =  1 -- committable
                COMMIT TRANSACTION;

            IF XACT_STATE() = -1 -- uncommittable
                ROLLBACK TRANSACTION;

            EXECUTE log.p_catch_error @CInfo;

        END CATCH
        --$$proc_footer_end$$
    --------------------------------------------------
end

and after:

CREATE procedure xpert.p_load_t_afk1 (@BatchID uniqueidentifier = null)
as
begin

    -- SET NOCOUNT ON added to prevent extra result sets from interfering with SELECT statements.
        SET NOCOUNT ON;

    --- standard log/error/transaction block  --- header ----
        --$$proc_header_start$$
        SET XACT_ABORT ON;
        DECLARE @CInfo ContextInformation;
        INSERT INTO @CInfo EXECUTE log.p_proc_start @@ProcID, @BatchID OUT;
        BEGIN TRY
        --$$proc_header_end$$

    -------------------------------------------------------

    if Config.[conf].[f_get_value_bit] ('STA', 'LoadFromProd_Xpert', 1) = 1
    begin

        truncate table xpert.t_afk1;

        insert into xpert.t_afk1
        (
            K1FIRM
            ,K1WKNR
            ,K1AFNR
            ,K1AFDA
            ,K1KDDA
            ,K1RLDA
            ,K1AART
            ,K1SACH
            ,K1ABTG
            ,K1AKDN
            ,K1AVSN
            ,K1KDMC
            ,K1AREF
            ,K1RFDA
            ,K1WACD
            ,K1MWKZ
            ,K1PSLI
            ,K1RBLI
            ,K1EIVB
            ,K1VATA
            ,K1ZULA
            ,K1FKTK
            ,K1SARG
            ,K1FKAT
            ,K1PFKZ
            ,K1FKNR
            ,K1PJNR
            ,K1AWFW
            ,K1AWSW
            ,K1ARFW
            ,K1ARSW
            ,K1BOFW
            ,K1BOSW
            ,K1NEGW
            ,K1BRGW
            ,K1VOLU
            ,K1KA01
            ,K1KA02
            ,K1KA03
            ,K1KA04
            ,K1FS01
            ,K1FS02
            ,K1FS03
            ,K1FS04
            ,K1WC01
            ,K1WC02
            ,K1WC03
            ,K1WC04
            ,K1PM01
            ,K1PM02
            ,K1PM03
            ,K1PM04
            ,K1ERG1
            ,K1ERG2
            ,K1ERG3
            ,K1ERG4
            ,K1BA01
            ,K1BA02
            ,K1BA03
            ,K1BA04
            ,K1ERK1
            ,K1ERK2
            ,K1ERK3
            ,K1ERK4
            ,K1NER1
            ,K1NER2
            ,K1NER3
            ,K1NER4
            ,K1ABV1
            ,K1ABV2
            ,K1ABV3
            ,K1ABV4
            ,K1ABK1
            ,K1ABK2
            ,K1ABK3
            ,K1ABK4
            ,K1NEWT
            ,K1ST01
            ,K1ST02
            ,K1AUCD
            ,K1KURS
            ,K1ABDR
            ,K1ABDS
            ,K1ABKT
            ,K1ABNU
            ,K1ZAAB
            ,K1DBNR
            ,K1KKDN
            ,K1WBUS
            ,K1ABDV
            ,K1BED1
            ,K1BED2
            ,K1DFUR
            ,K1LSNU
            ,K1EUKZ
            ,K1VZTA
            ,K1SALV
            ,K1PJNA
            ,K1VORB
            ,K1VTN1
            ,K1VTN2
            ,K1ERV1
            ,K1ERV2
            ,K1VTL1
            ,K1VTL2
            ,K1UKZ1
            ,K1UKZ2
            ,K1UKZ3
            ,K1UKZ4
            ,K1UKZ5
            ,K1KDF1
            ,K1KDF2
            ,K1KDF3
            ,K1KDF4
            ,K1KDF5
            ,K1RHF1
            ,K1RHF2
            ,K1RHF3
            ,K1RHF4
            ,K1RHF5
            ,K1RHF6
            ,K1RHF7
            ,K1RHF8
            ,K1NEKZ
            ,K1ANDA
            ,K1ANUS
            ,K1ANUZ
            ,K1AEDA
            ,K1AETI
            ,K1AEUS
            ,K1USER
            ,K1WSID
        )
        select
            K1FIRM
            ,K1WKNR
            ,K1AFNR
            ,K1AFDA
            ,K1KDDA
            ,K1RLDA
            ,K1AART
            ,K1SACH
            ,K1ABTG
            ,K1AKDN
            ,K1AVSN
            ,K1KDMC
            ,K1AREF
            ,K1RFDA
            ,K1WACD
            ,K1MWKZ
            ,K1PSLI
            ,K1RBLI
            ,K1EIVB
            ,K1VATA
            ,K1ZULA
            ,K1FKTK
            ,K1SARG
            ,K1FKAT
            ,K1PFKZ
            ,K1FKNR
            ,K1PJNR
            ,K1AWFW
            ,K1AWSW
            ,K1ARFW
            ,K1ARSW
            ,K1BOFW
            ,K1BOSW
            ,K1NEGW
            ,K1BRGW
            ,K1VOLU
            ,K1KA01
            ,K1KA02
            ,K1KA03
            ,K1KA04
            ,K1FS01
            ,K1FS02
            ,K1FS03
            ,K1FS04
            ,K1WC01
            ,K1WC02
            ,K1WC03
            ,K1WC04
            ,K1PM01
            ,K1PM02
            ,K1PM03
            ,K1PM04
            ,K1ERG1
            ,K1ERG2
            ,K1ERG3
            ,K1ERG4
            ,K1BA01
            ,K1BA02
            ,K1BA03
            ,K1BA04
            ,K1ERK1
            ,K1ERK2
            ,K1ERK3
            ,K1ERK4
            ,K1NER1
            ,K1NER2
            ,K1NER3
            ,K1NER4
            ,K1ABV1
            ,K1ABV2
            ,K1ABV3
            ,K1ABV4
            ,K1ABK1
            ,K1ABK2
            ,K1ABK3
            ,K1ABK4
            ,K1NEWT
            ,K1ST01
            ,K1ST02
            ,K1AUCD
            ,K1KURS
            ,K1ABDR
            ,K1ABDS
            ,K1ABKT
            ,K1ABNU
            ,K1ZAAB
            ,K1DBNR
            ,K1KKDN
            ,K1WBUS
            ,K1ABDV
            ,K1BED1
            ,K1BED2
            ,K1DFUR
            ,K1LSNU
            ,K1EUKZ
            ,K1VZTA
            ,K1SALV
            ,K1PJNA
            ,K1VORB
            ,K1VTN1
            ,K1VTN2
            ,K1ERV1
            ,K1ERV2
            ,K1VTL1
            ,K1VTL2
            ,K1UKZ1
            ,K1UKZ2
            ,K1UKZ3
            ,K1UKZ4
            ,K1UKZ5
            ,K1KDF1
            ,K1KDF2
            ,K1KDF3
            ,K1KDF4
            ,K1KDF5
            ,K1RHF1
            ,K1RHF2
            ,K1RHF3
            ,K1RHF4
            ,K1RHF5
            ,K1RHF6
            ,K1RHF7
            ,K1RHF8
            ,K1NEKZ
            ,K1ANDA
            ,K1ANUS
            ,K1ANUZ
            ,K1AEDA
            ,K1AETI
            ,K1AEUS
            ,K1USER
            ,K1WSID
        from [ZSTARS-DWH].STA.xpert.t_afk1;

    end
    else
    begin

        declare @execution_id bigint
        EXEC [SSISDB].[catalog].[create_execution] @package_name=N't_afk1.dtsx', @execution_id=@execution_id OUTPUT, @folder_name=N'Projects', @project_name=N'AS400 Loads', @use32bitruntime=False, @reference_id=Null
        EXEC [SSISDB].[catalog].[set_execution_parameter_value] @execution_id,  @object_type=50, @parameter_name=N'LOGGING_LEVEL', @parameter_value=1
        exec [SSISDB].[catalog].[set_execution_parameter_value] @execution_id,  @object_type=50, @parameter_name=N'SYNCHRONIZED', @parameter_value=1
        EXEC [SSISDB].[catalog].[start_execution] @execution_id

    end

    --- standard log/error/transaction block  --- footer ----

        EXECUTE log.p_proc_end @CInfo;

        --$$proc_footer_start$$
        END TRY
        BEGIN CATCH
            IF XACT_STATE() =  1 -- committable
                COMMIT TRANSACTION;

            IF XACT_STATE() = -1 -- uncommittable
                ROLLBACK TRANSACTION;

            EXECUTE log.p_catch_error @CInfo;

        END CATCH
        --$$proc_footer_end$$
    --------------------------------------------------
end

Solution

In the interest of keeping it ‘interesting’, clear and (finally) short, I have the following suggestions:

  1. Consolidate the calls to your database: in total, you execute n + 1 select queries against your database (where n is the number of stored procedures; 80 in your question). In the example below, I have written a SQL query that can get all column information for each procedure at once (I’ll show you how you can use the results in the next suggestions):

    with base_query as (
        select 
            table_name = replace(p.name, 'p_load_', '')
            , procedure_name = p.name
            , definition = object_definition(p.object_id)
        from sys.procedures p 
        inner join sys.schemas s 
            on s.schema_id = p.schema_id 
        where s.name = {schema_name}
    )
    select
        column_name = c.name
        , table_name = s.table_name
        , procedure_name = s.procedure_name
        , definition = s.definition
    from sys.columns c 
    inner join sys.tables t 
        on t.object_id = c.object_id 
    inner join base_query s 
        on s.table_name = t.name
    
  2. Use .join(...) for concatenation: the string .join(...) method concatenates elements of a list (the argument to join) with a separator (the string object). The 8 lines in your code starting with columnDefinition = '' can be shortened to just one:

    columnDefinition = 'rnttt,'.join([column.name for column in columns])
    

    I use a variant of this in the full example, below.

  3. Use .format(...) to make string formatting clearer: This is a matter of taste and programming for flexibility. You use the + operator a few times in your code to concatenate string literals with objects. By using .format(...), you can insert objects into strings and make it clear to anyone using your code what the object is intended to be formatted as (another nice bonus is that the linter won’t break up your string in your IDE). An example on how to use this is as follows (can replace your line defining replaceString)

    """ truncate table {schema}.{table};rnrnttinsert into {schema}.{table}
    rntt(rnttt{column}rntt)rnttselectrnttt{column}rnttfrom 
    [ZSTARS-DWH].STA.{schema}.{table};
    """.format(schema=schemaName, table=tableName, column=columnDefinition)
    
  4. Using with when opening a file: Python has a great feature called ‘context managers’ which can help with the set up and tear down logic of an object. When with is used with open(...), it automatically closes the file (file.close()) when the file object goes out of scope. This is a nice / interesting feature; it won’t improve the performance of your code.

    with open('UpdateScripts\' + schemaName + '.' + tableName + '.sql', 'w') as text_file:
        text_file.write(final.replace('create procedure', 'use STArngornalter procedure').replace('rn', 'n'))
    

Bonus round (to keep it interesting)

  1. Named tuples: really nice feature from the collections package that means that you can access a tuple using attribute names. I use them in the example below to group the result set from the consolidated query, and then iterate over the results.

  2. defaultdict: from the same package as named tuples, this lets you create a dict object which behaves as if it has a default value for keys that haven’t been set yet. I use it below to make grouping the results from the query a bit quicker.

  3. re.sub(...) can accept a function as its second argument (which in turn receives a match object as its single argument). In the example below, I use this to reduce the number of calls to re.sub(...) and simplify the code for the 3 lines starting oldLogic in your code.

Here’s the final version using all the suggestions above:

import pyodbc
import re
from collections import namedtuple, defaultdict

schema_name = 'xpert'

cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=dwh-dev;DATABASE=STA;Trusted_Connection=Yes')
cursor = cnxn.cursor()
sql = """
    with base_query as (
        select 
            table_name = replace(p.name, 'p_load_', '')
            , procedure_name = p.name
            , definition = object_definition(p.object_id)
        from sys.procedures p 
        inner join sys.schemas s 
            on s.schema_id = p.schema_id 
        where s.name = {schema_name}
    )
    select
        column_name = c.name
        , table_name = s.table_name
        , procedure_name = s.procedure_name
        , definition = s.definition
    from sys.columns c 
    inner join sys.tables t 
        on t.object_id = c.object_id 
    inner join base_query s 
        on s.table_name = t.name
""".format(schema_name=schema_name)
cursor.execute(sql)
rows = cursor.fetchall()

Group = namedtuple('Group', ['table', 'procedure', 'definition'])
grouped_results = defaultdict(list)
for row in rows:
    group = Group(row.table_name, row.procedure_name, row.definition)
    grouped_results[group].append(row.column_name)

for group, column_names in grouped_results.items():
    try:
        columnDefinition = 'rnttt,'.join(column_names)
        replaceString = """
            g<1>tif Config.[conf].[f_get_value_bit] ('STA', 'LoadFromProd_Xpert', 1)
            = 1rntbeginrnrntttruncate table {schema}.{table};rnrnttinsert into {schema}.{table}
            rntt(rnttt{column}rntt)rnttselectrnttt{column}rnttfrom [ZSTARS-DWH].STA.{schema}.{table};
            rnrntendrntelserntmybeginrnrn
        """.format(
            schema=schema_name, 
            table=group.table, 
            column=columnDefinition
        )

        result = re.sub('(--$$proc_header_end$$rnrnt[-]*rnrn)', replaceString, group.definition)
        result = re.sub('(--- standard log/error/transaction block  --- footer)', 'myendrnrntg<1>', result)

        def repl(match):
            updated = match.group(0).replace('rn', 'rnt')
            return 'begin{updated}end'.format(updated=updated)

        result = re.sub('mybegin([sSwWdD]*)tmyend', repl, result)
        with open('UpdateScripts\' + schemaName + '.' + tableName + '.sql', 'w') as text_file:
            text_file.write(result.replace('create procedure', 'use STArngornalter procedure').replace('rn', 'n'))

    except:
        print('Error editing ' + row.proc_schema + '.' + row.name)

Leave a Reply

Your email address will not be published. Required fields are marked *