mracs-configurations/scripts/nginxfmt.py

272 lines
9.1 KiB
Python

#!/usr/bin/env python3
"""
Copyright 2016 Michał Słomkowski
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
"""This Python script formats nginx configuration files in consistent way.
Originally published under https://github.com/1connect/nginx-config-formatter
"""
import argparse
import codecs
import re
__author__ = "Michał Słomkowski"
__license__ = "Apache 2.0"
__version__ = "1.0.2"
INDENTATION = ' ' * 4
TEMPLATE_VARIABLE_OPENING_TAG = '___TEMPLATE_VARIABLE_OPENING_TAG___'
TEMPLATE_VARIABLE_CLOSING_TAG = '___TEMPLATE_VARIABLE_CLOSING_TAG___'
TEMPLATE_BRACKET_OPENING_TAG = '___TEMPLATE_BRACKET_OPENING_TAG___'
TEMPLATE_BRACKET_CLOSING_TAG = '___TEMPLATE_BRACKET_CLOSING_TAG___'
def strip_line(single_line):
"""Strips the line and replaces neighbouring whitespaces with single space (except when within quotation marks)."""
single_line = single_line.strip()
if single_line.startswith('#'):
return single_line
within_quotes = False
parts = []
for part in re.split('"', single_line):
if within_quotes:
parts.append(part)
else:
parts.append(re.sub(r'[\s]+', ' ', part))
within_quotes = not within_quotes
return '"'.join(parts)
def count_multi_semicolon(single_line):
"""count multi_semicolon (except when within quotation marks)."""
single_line = single_line.strip()
if single_line.startswith('#'):
return 0, 0
within_quotes = False
q = 0
c = 0
for part in re.split('"', single_line):
if within_quotes:
q = 1
else:
c += part.count(';')
within_quotes = not within_quotes
return q, c
def multi_semicolon(single_line):
"""break multi_semicolon into multiline (except when within quotation marks)."""
single_line = single_line.strip()
if single_line.startswith('#'):
return single_line
within_quotes = False
parts = []
for part in re.split('"', single_line):
if within_quotes:
parts.append(part)
else:
parts.append(part.replace(";", ";\n"))
within_quotes = not within_quotes
return '"'.join(parts)
def apply_variable_template_tags(line: str) -> str:
"""Replaces variable indicators ${ and } with tags, so subsequent formatting is easier."""
return re.sub(r'\${\s*(\w+)\s*}',
TEMPLATE_VARIABLE_OPENING_TAG + r"\1" + TEMPLATE_VARIABLE_CLOSING_TAG,
line,
flags=re.UNICODE)
def strip_variable_template_tags(line: str) -> str:
"""Replaces tags back with ${ and } respectively."""
return re.sub(TEMPLATE_VARIABLE_OPENING_TAG + r'\s*(\w+)\s*' + TEMPLATE_VARIABLE_CLOSING_TAG,
r'${\1}',
line,
flags=re.UNICODE)
def apply_bracket_template_tags(content: str) -> str:
""" Replaces bracket { and } with tags, so subsequent formatting is easier."""
result = ""
in_quotes = False
last_c = ""
for c in content:
if (c == "\'" or c == "\"") and last_c != "\\":
in_quotes = reverse_in_quotes_status(in_quotes)
if in_quotes:
if c == "{":
result += TEMPLATE_BRACKET_OPENING_TAG
elif c == "}":
result += TEMPLATE_BRACKET_CLOSING_TAG
else:
result += c
else:
result += c
last_c = c
return result
def reverse_in_quotes_status(status: bool) -> bool:
if status:
return False
return True
def strip_bracket_template_tags(content: str) -> str:
""" Replaces tags back with { and } respectively."""
content = content.replace(TEMPLATE_BRACKET_OPENING_TAG, "{", -1)
content = content.replace(TEMPLATE_BRACKET_CLOSING_TAG, "}", -1)
return content
def clean_lines(orig_lines) -> list:
"""Strips the lines and splits them if they contain curly brackets."""
cleaned_lines = []
for line in orig_lines:
line = strip_line(line)
line = apply_variable_template_tags(line)
if line == "":
cleaned_lines.append("")
continue
else:
if line.startswith("#"):
cleaned_lines.append(strip_variable_template_tags(line))
else:
q, c = count_multi_semicolon(line)
if q == 1 and c > 1:
ml = multi_semicolon(line)
cleaned_lines.extend(clean_lines(ml.splitlines()))
elif q != 1 and c > 1:
newlines = line.split(";")
cleaned_lines.extend(clean_lines(["".join([ln, ";"]) for ln in newlines if ln != ""]))
else:
if line.startswith("rewrite"):
cleaned_lines.append(strip_variable_template_tags(line))
else:
cleaned_lines.extend(
[strip_variable_template_tags(l).strip() for l in re.split(r"([{}])", line) if l != ""])
return cleaned_lines
def join_opening_bracket(lines):
"""When opening curly bracket is in it's own line (K&R convention), it's joined with precluding line (Java)."""
modified_lines = []
for i in range(len(lines)):
if i > 0 and lines[i] == "{":
modified_lines[-1] += " {"
else:
modified_lines.append(lines[i])
return modified_lines
def perform_indentation(lines):
"""Indents the lines according to their nesting level determined by curly brackets."""
indented_lines = []
current_indent = 0
for line in lines:
if not line.startswith("#") and line.endswith('}') and current_indent > 0:
current_indent -= 1
if line != "":
indented_lines.append(current_indent * INDENTATION + line)
else:
indented_lines.append("")
if not line.startswith("#") and line.endswith('{'):
current_indent += 1
return indented_lines
def format_config_contents(contents):
"""Accepts the string containing nginx configuration and returns formatted one. Adds newline at the end."""
contents = apply_bracket_template_tags(contents)
lines = contents.splitlines()
lines = clean_lines(lines)
lines = join_opening_bracket(lines)
lines = perform_indentation(lines)
text = '\n'.join(lines)
text = strip_bracket_template_tags(text)
for pattern, substitute in ((r'\n{3,}', '\n\n\n'), (r'^\n', ''), (r'\n$', '')):
text = re.sub(pattern, substitute, text, re.MULTILINE)
return text + '\n'
def format_config_file(file_path, original_backup_file_path=None, verbose=True):
"""
Performs the formatting on the given file. The function tries to detect file encoding first.
:param file_path: path to original nginx configuration file. This file will be overridden.
:param original_backup_file_path: optional path, where original file will be backed up.
:param verbose: show messages
"""
encodings = ('utf-8', 'latin1')
encoding_failures = []
chosen_encoding = None
for enc in encodings:
try:
with codecs.open(file_path, 'r', encoding=enc) as rfp:
original_file_content = rfp.read()
chosen_encoding = enc
break
except ValueError as e:
encoding_failures.append(e)
if chosen_encoding is None:
raise Exception('none of encodings %s are valid for file %s. Errors: %s'
% (encodings, file_path, [e.message for e in encoding_failures]))
assert original_file_content is not None
with codecs.open(file_path, 'w', encoding=chosen_encoding) as wfp:
wfp.write(format_config_contents(original_file_content))
if verbose:
print("Formatted file '%s' (detected encoding %s)." % (file_path, chosen_encoding))
if original_backup_file_path:
with codecs.open(original_backup_file_path, 'w', encoding=chosen_encoding) as wfp:
wfp.write(original_file_content)
if verbose:
print("Original saved to '%s'." % original_backup_file_path)
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(description=__doc__)
arg_parser.add_argument("-v", "--verbose", action="store_true", help="show formatted file names")
arg_parser.add_argument("-b", "--backup-original", action="store_true", help="backup original config file")
arg_parser.add_argument("config_files", nargs='+', help="configuration files to format")
args = arg_parser.parse_args()
for config_file_path in args.config_files:
backup_file_path = config_file_path + '~' if args.backup_original else None
format_config_file(config_file_path, backup_file_path, args.verbose)