#!/usr/bin/env python3

"""
   Copyright 2016 Michał Słomkowski

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
"""

"""This Python script formats nginx configuration files in consistent way.
Originally published under https://github.com/1connect/nginx-config-formatter
"""

import argparse
import codecs

import re

__author__ = "Michał Słomkowski"
__license__ = "Apache 2.0"
__version__ = "1.0.2"

INDENTATION = ' ' * 4

TEMPLATE_VARIABLE_OPENING_TAG = '___TEMPLATE_VARIABLE_OPENING_TAG___'
TEMPLATE_VARIABLE_CLOSING_TAG = '___TEMPLATE_VARIABLE_CLOSING_TAG___'

TEMPLATE_BRACKET_OPENING_TAG = '___TEMPLATE_BRACKET_OPENING_TAG___'
TEMPLATE_BRACKET_CLOSING_TAG = '___TEMPLATE_BRACKET_CLOSING_TAG___'


def strip_line(single_line):
    """Strips the line and replaces neighbouring whitespaces with single space (except when within quotation marks)."""
    single_line = single_line.strip()
    if single_line.startswith('#'):
        return single_line

    within_quotes = False
    parts = []
    for part in re.split('"', single_line):
        if within_quotes:
            parts.append(part)
        else:
            parts.append(re.sub(r'[\s]+', ' ', part))
        within_quotes = not within_quotes
    return '"'.join(parts)


def count_multi_semicolon(single_line):
    """count multi_semicolon (except when within quotation marks)."""
    single_line = single_line.strip()
    if single_line.startswith('#'):
        return 0, 0

    within_quotes = False
    q = 0
    c = 0
    for part in re.split('"', single_line):
        if within_quotes:
            q = 1
        else:
            c += part.count(';')
        within_quotes = not within_quotes
    return q, c


def multi_semicolon(single_line):
    """break multi_semicolon into multiline (except when within quotation marks)."""
    single_line = single_line.strip()
    if single_line.startswith('#'):
        return single_line

    within_quotes = False
    parts = []
    for part in re.split('"', single_line):
        if within_quotes:
            parts.append(part)
        else:
            parts.append(part.replace(";", ";\n"))
        within_quotes = not within_quotes
    return '"'.join(parts)


def apply_variable_template_tags(line: str) -> str:
    """Replaces variable indicators ${ and } with tags, so subsequent formatting is easier."""
    return re.sub(r'\${\s*(\w+)\s*}',
                  TEMPLATE_VARIABLE_OPENING_TAG + r"\1" + TEMPLATE_VARIABLE_CLOSING_TAG,
                  line,
                  flags=re.UNICODE)


def strip_variable_template_tags(line: str) -> str:
    """Replaces tags back with ${ and } respectively."""
    return re.sub(TEMPLATE_VARIABLE_OPENING_TAG + r'\s*(\w+)\s*' + TEMPLATE_VARIABLE_CLOSING_TAG,
                  r'${\1}',
                  line,
                  flags=re.UNICODE)


def apply_bracket_template_tags(content: str) -> str:
    """ Replaces bracket { and } with tags, so subsequent formatting is easier."""
    result = ""
    in_quotes = False
    last_c = ""

    for c in content:
        if (c == "\'" or c == "\"") and last_c != "\\":
            in_quotes = reverse_in_quotes_status(in_quotes)
        if in_quotes:
            if c == "{":
                result += TEMPLATE_BRACKET_OPENING_TAG
            elif c == "}":
                result += TEMPLATE_BRACKET_CLOSING_TAG
            else:
                result += c
        else:
            result += c
        last_c = c
    return result


def reverse_in_quotes_status(status: bool) -> bool:
    if status:
        return False
    return True


def strip_bracket_template_tags(content: str) -> str:
    """ Replaces tags back with { and } respectively."""
    content = content.replace(TEMPLATE_BRACKET_OPENING_TAG, "{", -1)
    content = content.replace(TEMPLATE_BRACKET_CLOSING_TAG, "}", -1)
    return content


def clean_lines(orig_lines) -> list:
    """Strips the lines and splits them if they contain curly brackets."""
    cleaned_lines = []
    for line in orig_lines:
        line = strip_line(line)
        line = apply_variable_template_tags(line)
        if line == "":
            cleaned_lines.append("")
            continue
        else:
            if line.startswith("#"):
                cleaned_lines.append(strip_variable_template_tags(line))
            else:
                q, c = count_multi_semicolon(line)
                if q == 1 and c > 1:
                    ml = multi_semicolon(line)
                    cleaned_lines.extend(clean_lines(ml.splitlines()))
                elif q != 1 and c > 1:
                    newlines = line.split(";")
                    cleaned_lines.extend(clean_lines(["".join([ln, ";"]) for ln in newlines if ln != ""]))
                else:
                    if line.startswith("rewrite"):
                        cleaned_lines.append(strip_variable_template_tags(line))
                    else:
                        cleaned_lines.extend(
                            [strip_variable_template_tags(l).strip() for l in re.split(r"([{}])", line) if l != ""])
    return cleaned_lines


def join_opening_bracket(lines):
    """When opening curly bracket is in it's own line (K&R convention), it's joined with precluding line (Java)."""
    modified_lines = []
    for i in range(len(lines)):
        if i > 0 and lines[i] == "{":
            modified_lines[-1] += " {"
        else:
            modified_lines.append(lines[i])
    return modified_lines


def perform_indentation(lines):
    """Indents the lines according to their nesting level determined by curly brackets."""
    indented_lines = []
    current_indent = 0
    for line in lines:
        if not line.startswith("#") and line.endswith('}') and current_indent > 0:
            current_indent -= 1

        if line != "":
            indented_lines.append(current_indent * INDENTATION + line)
        else:
            indented_lines.append("")

        if not line.startswith("#") and line.endswith('{'):
            current_indent += 1

    return indented_lines


def format_config_contents(contents):
    """Accepts the string containing nginx configuration and returns formatted one. Adds newline at the end."""
    contents = apply_bracket_template_tags(contents)
    lines = contents.splitlines()
    lines = clean_lines(lines)
    lines = join_opening_bracket(lines)
    lines = perform_indentation(lines)

    text = '\n'.join(lines)
    text = strip_bracket_template_tags(text)

    for pattern, substitute in ((r'\n{3,}', '\n\n\n'), (r'^\n', ''), (r'\n$', '')):
        text = re.sub(pattern, substitute, text, re.MULTILINE)

    return text + '\n'


def format_config_file(file_path, original_backup_file_path=None, verbose=True):
    """
    Performs the formatting on the given file. The function tries to detect file encoding first.
    :param file_path: path to original nginx configuration file. This file will be overridden.
    :param original_backup_file_path: optional path, where original file will be backed up.
    :param verbose: show messages
    """
    encodings = ('utf-8', 'latin1')

    encoding_failures = []
    chosen_encoding = None

    for enc in encodings:
        try:
            with codecs.open(file_path, 'r', encoding=enc) as rfp:
                original_file_content = rfp.read()
            chosen_encoding = enc
            break
        except ValueError as e:
            encoding_failures.append(e)

    if chosen_encoding is None:
        raise Exception('none of encodings %s are valid for file %s. Errors: %s'
                        % (encodings, file_path, [e.message for e in encoding_failures]))

    assert original_file_content is not None

    with codecs.open(file_path, 'w', encoding=chosen_encoding) as wfp:
        wfp.write(format_config_contents(original_file_content))

    if verbose:
        print("Formatted file '%s' (detected encoding %s)." % (file_path, chosen_encoding))

    if original_backup_file_path:
        with codecs.open(original_backup_file_path, 'w', encoding=chosen_encoding) as wfp:
            wfp.write(original_file_content)
        if verbose:
            print("Original saved to '%s'." % original_backup_file_path)


if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser(description=__doc__)

    arg_parser.add_argument("-v", "--verbose", action="store_true", help="show formatted file names")
    arg_parser.add_argument("-b", "--backup-original", action="store_true", help="backup original config file")
    arg_parser.add_argument("config_files", nargs='+', help="configuration files to format")

    args = arg_parser.parse_args()

    for config_file_path in args.config_files:
        backup_file_path = config_file_path + '~' if args.backup_original else None
        format_config_file(config_file_path, backup_file_path, args.verbose)