Problem
I have to add some extra logic to about 80 similar stored procedures on my DWH server.
I thought that it would take pretty much the same time if I do it with Python and lots of googling (I have no Python experience) or if I do it manually. So I decided to do it “the interesting way”.
My script works, but considering that Python can do cool things with minimal code, I’m sure that it can be shortened.
So here’s the script:
import pyodbc
import re
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=dwh-dev;DATABASE=STA;Trusted_Connection=Yes')
cursor = cnxn.cursor()
cursor.execute("select proc_schema = s.name, name = p.name, definition = object_definition(p.object_id) from sys.procedures p inner join sys.schemas s on s.schema_id = p.schema_id where 1=1 and s.name = 'xpert'")
rows = cursor.fetchall()
for row in rows:
try:
part1 = re.sub('(--$$proc_header_end$$rnrnt[-]*rnrn)', 'g<1>tif Config.[conf].[f_get_value_bit] ('STA', 'LoadFromProd_Xpert', 1) = 1rntbeginrnrntt<PLACEHOLDER>rnrntendrntelserntmybeginrnrn', row.definition)
part2 = re.sub('(--- standard log/error/transaction block --- footer)', 'myendrnrntg<1>', part1)
schemaName = row.proc_schema
tableName = row.name.replace('p_load_', '')
cursor1 = cnxn.cursor()
sql = "select c.name from sys.columns c inner join sys.tables t on t.object_id = c.object_id inner join sys.schemas s on t.schema_id = s.schema_id where s.name = '" + schemaName + "' and t.name = '" + tableName + "'"
cursor1.execute(sql)
columns = cursor1.fetchall()
columnDefinition = ''
firstIteration = 1
for column in columns:
if firstIteration == 1:
columnDefinition = column.name
firstIteration = 0
else:
columnDefinition = columnDefinition + 'rnttt,' + column.name
replaceString = 'truncate table ' + schemaName + '.' + tableName + ';rnrnttinsert into ' + schemaName + '.' + tableName + 'rntt(rnttt' + columnDefinition + 'rntt)rnttselectrnttt' + columnDefinition + 'rnttfrom [ZSTARS-DWH].STA.' + schemaName + '.' + tableName + ';'
almostFinal = part2.replace('<PLACEHOLDER>', replaceString)
oldLogic = re.findall('mybegin([sSwWdD]*)tmyend', almostFinal)
oldLogicIndented = 'begin' + oldLogic[0].replace('rn', 'rnt') + 'end'
final = re.sub('mybegin([sSwWdD]*)tmyend', oldLogicIndented, almostFinal)
text_file = open('UpdateScripts\' + schemaName + '.' + tableName + '.sql', 'w')
text_file.write(final.replace('create procedure', 'use STArngornalter procedure').replace('rn', 'n'))
text_file.close()
except:
print('Error editing ' + row.proc_schema + '.' + row.name)
and this is one example result definition from the first query before:
CREATE procedure xpert.p_load_t_afk1 (@BatchID uniqueidentifier = null)
as
begin
-- SET NOCOUNT ON added to prevent extra result sets from interfering with SELECT statements.
SET NOCOUNT ON;
--- standard log/error/transaction block --- header ----
--$$proc_header_start$$
SET XACT_ABORT ON;
DECLARE @CInfo ContextInformation;
INSERT INTO @CInfo EXECUTE log.p_proc_start @@ProcID, @BatchID OUT;
BEGIN TRY
--$$proc_header_end$$
-------------------------------------------------------
declare @execution_id bigint
EXEC [SSISDB].[catalog].[create_execution] @package_name=N't_afk1.dtsx', @execution_id=@execution_id OUTPUT, @folder_name=N'Projects', @project_name=N'AS400 Loads', @use32bitruntime=False, @reference_id=Null
EXEC [SSISDB].[catalog].[set_execution_parameter_value] @execution_id, @object_type=50, @parameter_name=N'LOGGING_LEVEL', @parameter_value=1
exec [SSISDB].[catalog].[set_execution_parameter_value] @execution_id, @object_type=50, @parameter_name=N'SYNCHRONIZED', @parameter_value=1
EXEC [SSISDB].[catalog].[start_execution] @execution_id
--- standard log/error/transaction block --- footer ----
EXECUTE log.p_proc_end @CInfo;
--$$proc_footer_start$$
END TRY
BEGIN CATCH
IF XACT_STATE() = 1 -- committable
COMMIT TRANSACTION;
IF XACT_STATE() = -1 -- uncommittable
ROLLBACK TRANSACTION;
EXECUTE log.p_catch_error @CInfo;
END CATCH
--$$proc_footer_end$$
--------------------------------------------------
end
and after:
CREATE procedure xpert.p_load_t_afk1 (@BatchID uniqueidentifier = null)
as
begin
-- SET NOCOUNT ON added to prevent extra result sets from interfering with SELECT statements.
SET NOCOUNT ON;
--- standard log/error/transaction block --- header ----
--$$proc_header_start$$
SET XACT_ABORT ON;
DECLARE @CInfo ContextInformation;
INSERT INTO @CInfo EXECUTE log.p_proc_start @@ProcID, @BatchID OUT;
BEGIN TRY
--$$proc_header_end$$
-------------------------------------------------------
if Config.[conf].[f_get_value_bit] ('STA', 'LoadFromProd_Xpert', 1) = 1
begin
truncate table xpert.t_afk1;
insert into xpert.t_afk1
(
K1FIRM
,K1WKNR
,K1AFNR
,K1AFDA
,K1KDDA
,K1RLDA
,K1AART
,K1SACH
,K1ABTG
,K1AKDN
,K1AVSN
,K1KDMC
,K1AREF
,K1RFDA
,K1WACD
,K1MWKZ
,K1PSLI
,K1RBLI
,K1EIVB
,K1VATA
,K1ZULA
,K1FKTK
,K1SARG
,K1FKAT
,K1PFKZ
,K1FKNR
,K1PJNR
,K1AWFW
,K1AWSW
,K1ARFW
,K1ARSW
,K1BOFW
,K1BOSW
,K1NEGW
,K1BRGW
,K1VOLU
,K1KA01
,K1KA02
,K1KA03
,K1KA04
,K1FS01
,K1FS02
,K1FS03
,K1FS04
,K1WC01
,K1WC02
,K1WC03
,K1WC04
,K1PM01
,K1PM02
,K1PM03
,K1PM04
,K1ERG1
,K1ERG2
,K1ERG3
,K1ERG4
,K1BA01
,K1BA02
,K1BA03
,K1BA04
,K1ERK1
,K1ERK2
,K1ERK3
,K1ERK4
,K1NER1
,K1NER2
,K1NER3
,K1NER4
,K1ABV1
,K1ABV2
,K1ABV3
,K1ABV4
,K1ABK1
,K1ABK2
,K1ABK3
,K1ABK4
,K1NEWT
,K1ST01
,K1ST02
,K1AUCD
,K1KURS
,K1ABDR
,K1ABDS
,K1ABKT
,K1ABNU
,K1ZAAB
,K1DBNR
,K1KKDN
,K1WBUS
,K1ABDV
,K1BED1
,K1BED2
,K1DFUR
,K1LSNU
,K1EUKZ
,K1VZTA
,K1SALV
,K1PJNA
,K1VORB
,K1VTN1
,K1VTN2
,K1ERV1
,K1ERV2
,K1VTL1
,K1VTL2
,K1UKZ1
,K1UKZ2
,K1UKZ3
,K1UKZ4
,K1UKZ5
,K1KDF1
,K1KDF2
,K1KDF3
,K1KDF4
,K1KDF5
,K1RHF1
,K1RHF2
,K1RHF3
,K1RHF4
,K1RHF5
,K1RHF6
,K1RHF7
,K1RHF8
,K1NEKZ
,K1ANDA
,K1ANUS
,K1ANUZ
,K1AEDA
,K1AETI
,K1AEUS
,K1USER
,K1WSID
)
select
K1FIRM
,K1WKNR
,K1AFNR
,K1AFDA
,K1KDDA
,K1RLDA
,K1AART
,K1SACH
,K1ABTG
,K1AKDN
,K1AVSN
,K1KDMC
,K1AREF
,K1RFDA
,K1WACD
,K1MWKZ
,K1PSLI
,K1RBLI
,K1EIVB
,K1VATA
,K1ZULA
,K1FKTK
,K1SARG
,K1FKAT
,K1PFKZ
,K1FKNR
,K1PJNR
,K1AWFW
,K1AWSW
,K1ARFW
,K1ARSW
,K1BOFW
,K1BOSW
,K1NEGW
,K1BRGW
,K1VOLU
,K1KA01
,K1KA02
,K1KA03
,K1KA04
,K1FS01
,K1FS02
,K1FS03
,K1FS04
,K1WC01
,K1WC02
,K1WC03
,K1WC04
,K1PM01
,K1PM02
,K1PM03
,K1PM04
,K1ERG1
,K1ERG2
,K1ERG3
,K1ERG4
,K1BA01
,K1BA02
,K1BA03
,K1BA04
,K1ERK1
,K1ERK2
,K1ERK3
,K1ERK4
,K1NER1
,K1NER2
,K1NER3
,K1NER4
,K1ABV1
,K1ABV2
,K1ABV3
,K1ABV4
,K1ABK1
,K1ABK2
,K1ABK3
,K1ABK4
,K1NEWT
,K1ST01
,K1ST02
,K1AUCD
,K1KURS
,K1ABDR
,K1ABDS
,K1ABKT
,K1ABNU
,K1ZAAB
,K1DBNR
,K1KKDN
,K1WBUS
,K1ABDV
,K1BED1
,K1BED2
,K1DFUR
,K1LSNU
,K1EUKZ
,K1VZTA
,K1SALV
,K1PJNA
,K1VORB
,K1VTN1
,K1VTN2
,K1ERV1
,K1ERV2
,K1VTL1
,K1VTL2
,K1UKZ1
,K1UKZ2
,K1UKZ3
,K1UKZ4
,K1UKZ5
,K1KDF1
,K1KDF2
,K1KDF3
,K1KDF4
,K1KDF5
,K1RHF1
,K1RHF2
,K1RHF3
,K1RHF4
,K1RHF5
,K1RHF6
,K1RHF7
,K1RHF8
,K1NEKZ
,K1ANDA
,K1ANUS
,K1ANUZ
,K1AEDA
,K1AETI
,K1AEUS
,K1USER
,K1WSID
from [ZSTARS-DWH].STA.xpert.t_afk1;
end
else
begin
declare @execution_id bigint
EXEC [SSISDB].[catalog].[create_execution] @package_name=N't_afk1.dtsx', @execution_id=@execution_id OUTPUT, @folder_name=N'Projects', @project_name=N'AS400 Loads', @use32bitruntime=False, @reference_id=Null
EXEC [SSISDB].[catalog].[set_execution_parameter_value] @execution_id, @object_type=50, @parameter_name=N'LOGGING_LEVEL', @parameter_value=1
exec [SSISDB].[catalog].[set_execution_parameter_value] @execution_id, @object_type=50, @parameter_name=N'SYNCHRONIZED', @parameter_value=1
EXEC [SSISDB].[catalog].[start_execution] @execution_id
end
--- standard log/error/transaction block --- footer ----
EXECUTE log.p_proc_end @CInfo;
--$$proc_footer_start$$
END TRY
BEGIN CATCH
IF XACT_STATE() = 1 -- committable
COMMIT TRANSACTION;
IF XACT_STATE() = -1 -- uncommittable
ROLLBACK TRANSACTION;
EXECUTE log.p_catch_error @CInfo;
END CATCH
--$$proc_footer_end$$
--------------------------------------------------
end
Solution
In the interest of keeping it ‘interesting’, clear and (finally) short, I have the following suggestions:
-
Consolidate the calls to your database: in total, you execute
n + 1
select
queries against your database (wheren
is the number of stored procedures; 80 in your question). In the example below, I have written a SQL query that can get all column information for each procedure at once (I’ll show you how you can use the results in the next suggestions):with base_query as ( select table_name = replace(p.name, 'p_load_', '') , procedure_name = p.name , definition = object_definition(p.object_id) from sys.procedures p inner join sys.schemas s on s.schema_id = p.schema_id where s.name = {schema_name} ) select column_name = c.name , table_name = s.table_name , procedure_name = s.procedure_name , definition = s.definition from sys.columns c inner join sys.tables t on t.object_id = c.object_id inner join base_query s on s.table_name = t.name
-
Use
.join(...)
for concatenation: the string.join(...)
method concatenates elements of a list (the argument tojoin
) with a separator (the string object). The 8 lines in your code starting withcolumnDefinition = ''
can be shortened to just one:columnDefinition = 'rnttt,'.join([column.name for column in columns])
I use a variant of this in the full example, below.
-
Use
.format(...)
to make string formatting clearer: This is a matter of taste and programming for flexibility. You use the+
operator a few times in your code to concatenate string literals with objects. By using.format(...)
, you can insert objects into strings and make it clear to anyone using your code what the object is intended to be formatted as (another nice bonus is that the linter won’t break up your string in your IDE). An example on how to use this is as follows (can replace your line definingreplaceString
)""" truncate table {schema}.{table};rnrnttinsert into {schema}.{table} rntt(rnttt{column}rntt)rnttselectrnttt{column}rnttfrom [ZSTARS-DWH].STA.{schema}.{table}; """.format(schema=schemaName, table=tableName, column=columnDefinition)
-
Using
with
when opening a file: Python has a great feature called ‘context managers’ which can help with the set up and tear down logic of an object. Whenwith
is used withopen(...)
, it automatically closes the file (file.close()
) when the file object goes out of scope. This is a nice / interesting feature; it won’t improve the performance of your code.with open('UpdateScripts\' + schemaName + '.' + tableName + '.sql', 'w') as text_file: text_file.write(final.replace('create procedure', 'use STArngornalter procedure').replace('rn', 'n'))
Bonus round (to keep it interesting)
-
Named tuples: really nice feature from the
collections
package that means that you can access a tuple using attribute names. I use them in the example below to group the result set from the consolidated query, and then iterate over the results. -
defaultdict: from the same package as named tuples, this lets you create a dict object which behaves as if it has a default value for keys that haven’t been set yet. I use it below to make grouping the results from the query a bit quicker.
-
re.sub(...)
can accept a function as its second argument (which in turn receives amatch
object as its single argument). In the example below, I use this to reduce the number of calls tore.sub(...)
and simplify the code for the 3 lines startingoldLogic
in your code.
Here’s the final version using all the suggestions above:
import pyodbc
import re
from collections import namedtuple, defaultdict
schema_name = 'xpert'
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=dwh-dev;DATABASE=STA;Trusted_Connection=Yes')
cursor = cnxn.cursor()
sql = """
with base_query as (
select
table_name = replace(p.name, 'p_load_', '')
, procedure_name = p.name
, definition = object_definition(p.object_id)
from sys.procedures p
inner join sys.schemas s
on s.schema_id = p.schema_id
where s.name = {schema_name}
)
select
column_name = c.name
, table_name = s.table_name
, procedure_name = s.procedure_name
, definition = s.definition
from sys.columns c
inner join sys.tables t
on t.object_id = c.object_id
inner join base_query s
on s.table_name = t.name
""".format(schema_name=schema_name)
cursor.execute(sql)
rows = cursor.fetchall()
Group = namedtuple('Group', ['table', 'procedure', 'definition'])
grouped_results = defaultdict(list)
for row in rows:
group = Group(row.table_name, row.procedure_name, row.definition)
grouped_results[group].append(row.column_name)
for group, column_names in grouped_results.items():
try:
columnDefinition = 'rnttt,'.join(column_names)
replaceString = """
g<1>tif Config.[conf].[f_get_value_bit] ('STA', 'LoadFromProd_Xpert', 1)
= 1rntbeginrnrntttruncate table {schema}.{table};rnrnttinsert into {schema}.{table}
rntt(rnttt{column}rntt)rnttselectrnttt{column}rnttfrom [ZSTARS-DWH].STA.{schema}.{table};
rnrntendrntelserntmybeginrnrn
""".format(
schema=schema_name,
table=group.table,
column=columnDefinition
)
result = re.sub('(--$$proc_header_end$$rnrnt[-]*rnrn)', replaceString, group.definition)
result = re.sub('(--- standard log/error/transaction block --- footer)', 'myendrnrntg<1>', result)
def repl(match):
updated = match.group(0).replace('rn', 'rnt')
return 'begin{updated}end'.format(updated=updated)
result = re.sub('mybegin([sSwWdD]*)tmyend', repl, result)
with open('UpdateScripts\' + schemaName + '.' + tableName + '.sql', 'w') as text_file:
text_file.write(result.replace('create procedure', 'use STArngornalter procedure').replace('rn', 'n'))
except:
print('Error editing ' + row.proc_schema + '.' + row.name)