Scenario: Need to export data using RVTool for Private cloud DC migration to Azure/AWS Cloud. I have been asked to mask any sensitive information like host, hostnames, IP Addresses etc. Provide the information for scoping the migration work as the initial steps.
- To develop this, we need to make a tool to read all .xlsx files and read and print the names of each tabs.
import pandas as pd
import os
# Set the file path
file_path = 'C:\\Python312\\RVTool_masker\\'
# Function to read and print sheet names
def read_excel_files(file_path):
# Get all files in the directory
files = os.listdir(file_path)
# Loop through each file
for file in files:
# Check if the file is an Excel file
if file.endswith('.xlsx'):
print("File:", file)
# Read the Excel file
xls = pd.ExcelFile(os.path.join(file_path, file))
# Get sheet names
sheet_names = xls.sheet_names
# Print sheet names
print("Sheet names:")
for sheet_name in sheet_names:
print(sheet_name)
print("\n")
# Call the function
read_excel_files(file_path)
2. Now, use pandas to read and mask sensitive information in the files with *** and ###. use xlsxwriter to write to new excel files.
import pandas as pd
import os
# Set the file path
file_path = 'C:\\Python312\\RVTool_masker\\'
# Function to mask VM names and IP addresses and save to separate files
def mask_vm_names(file_path):
# Function to mask VM names
def mask_vm_name(vm_name):
if isinstance(vm_name, str) and len(vm_name) > 6:
return vm_name[:3] + '*' * (len(vm_name) - 6) + vm_name[-3:]
else:
return vm_name
# Function to mask IP addresses
def mask_ip_address(ip_address):
if isinstance(ip_address, str) and len(ip_address.split('.')) == 4:
octets = ip_address.split('.')
return octets[0] + '.' + octets[1] + '.###.###'
else:
return ip_address
# Get all files in the directory
files = os.listdir(file_path)
# Loop through each file
for file in files:
# Check if the file is an Excel file
if file.endswith('.xlsx'):
print("Processing file:", file)
# Read the Excel file
xls = pd.ExcelFile(os.path.join(file_path, file))
# Create a writer to save masked data
writer = pd.ExcelWriter(os.path.join(file_path, file.replace('.xlsx', '_masked.xlsx')), engine='xlsxwriter')
# Loop through each sheet
for sheet_name in xls.sheet_names:
# Read the sheet
df = pd.read_excel(xls, sheet_name)
# Check if specific columns exist, otherwise find the first string column
target_columns = ['VM', 'Name', 'Host', 'Port', 'Switch'] # Define your target columns here
vm_columns = [col for col in df.columns if any(target in col for target in target_columns)]
if not vm_columns:
for col in df.columns:
if df[col].dtype == 'object':
vm_columns = [col]
break
# Mask VM names
for vm_column in vm_columns:
df[vm_column] = df[vm_column].apply(mask_vm_name)
# Mask IP addresses
if 'Primary IP Address' in df.columns:
df['Primary IP Address'] = df['Primary IP Address'].apply(mask_ip_address)
# Write masked data to the writer
df.to_excel(writer, sheet_name=sheet_name, index=False)
# Save the masked data to a new file
writer.close()
print("Saved masked data to:", file.replace('.xlsx', '_masked.xlsx'))
# Call the function
mask_vm_names(file_path)
3. Reiterate to mask “Customer” colunm to replace with X’s
import pandas as pd
import os
# Set the file path
file_path = 'C:\\Python312\\RVTool_masker\\'
# Function to mask VM names, IP addresses, and customer names and save to separate files
def mask_vm_names(file_path):
# Function to mask VM names
def mask_vm_name(vm_name):
if isinstance(vm_name, str) and len(vm_name) > 6:
return vm_name[:3] + '*' * (len(vm_name) - 6) + vm_name[-3:]
else:
return vm_name
# Function to mask IP addresses
def mask_ip_address(ip_address):
if isinstance(ip_address, str) and len(ip_address.split('.')) == 4:
octets = ip_address.split('.')
return octets[0] + '.' + octets[1] + '.###.###'
else:
return ip_address
# Function to mask customer names
def mask_customer_name(customer_name):
if isinstance(customer_name, str) and len(customer_name) > 3:
return customer_name[:2] + 'X' * (len(customer_name) - 3) + customer_name[-1]
else:
return customer_name
# Get all files in the directory
files = os.listdir(file_path)
# Loop through each file
for file in files:
# Check if the file is an Excel file
if file.endswith('.xlsx'):
print("Processing file:", file)
# Read the Excel file
xls = pd.ExcelFile(os.path.join(file_path, file))
# Create a writer to save masked data
writer = pd.ExcelWriter(os.path.join(file_path, file.replace('.xlsx', '_masked.xlsx')), engine='xlsxwriter')
# Loop through each sheet
for sheet_name in xls.sheet_names:
# Read the sheet
df = pd.read_excel(xls, sheet_name)
# Check if specific columns exist, otherwise find the first string column
target_columns = ['VM', 'Name', 'Host', 'Port', 'Switch'] # Define your target columns here
vm_columns = [col for col in df.columns if any(target in col for target in target_columns)]
if not vm_columns:
for col in df.columns:
if df[col].dtype == 'object':
vm_columns = [col]
break
# Mask VM names
for vm_column in vm_columns:
df[vm_column] = df[vm_column].apply(mask_vm_name)
# Mask IP addresses
if 'Primary IP Address' in df.columns:
df['Primary IP Address'] = df['Primary IP Address'].apply(mask_ip_address)
# Mask customer names
if 'Customer' in df.columns:
df['Customer'] = df['Customer'].apply(mask_customer_name)
# Write masked data to the writer
df.to_excel(writer, sheet_name=sheet_name, index=False)
# Save the masked data to a new file
writer.close()
print("Saved masked data to:", file.replace('.xlsx', '_masked.xlsx'))
# Call the function
mask_vm_names(file_path)
Leave a comment