""" Validate JSON format. Licensed under MIT Copyright (c) 2012-2015 Isaac Muse """ import re import codecs import json RE_LINE_PRESERVE = re.compile(r"\r?\n", re.MULTILINE) RE_COMMENT = re.compile( r'''(?x) (?P /\*[^*]*\*+(?:[^/*][^*]*\*+)*/ # multi-line comments | [ \t]*//(?:[^\r\n])* # single line comments ) | (?P "(?:\\.|[^"\\])*" # double quotes | .[^/"']* # everything else ) ''', re.DOTALL ) RE_TRAILING_COMMA = re.compile( r'''(?x) ( (?P , # trailing comma (?P[\s\r\n]*) # white space (?P\]) # bracket ) | (?P , # trailing comma (?P[\s\r\n]*) # white space (?P\}) # bracket ) ) | (?P "(?:\\.|[^"\\])*" # double quoted string | .[^,"']* # everything else ) ''', re.DOTALL ) RE_LINE_INDENT_TAB = re.compile(r'^(?:(\t+)?(?:(/\*)|[^ \t\r\n])[^\r\n]*)?\r?\n$') RE_LINE_INDENT_SPACE = re.compile(r'^(?:((?: {4})+)?(?:(/\*)|[^ \t\r\n])[^\r\n]*)?\r?\n$') RE_TRAILING_SPACES = re.compile(r'^.*?[ \t]+\r?\n?$') RE_COMMENT_END = re.compile(r'\*/') PATTERN_COMMENT_INDENT_SPACE = r'^(%s *?[^\t\r\n][^\r\n]*)?\r?\n$' PATTERN_COMMENT_INDENT_TAB = r'^(%s[ \t]*[^ \t\r\n][^\r\n]*)?\r?\n$' E_MALFORMED = "E0" E_COMMENTS = "E1" E_COMMA = "E2" W_NL_START = "W1" W_NL_END = "W2" W_INDENT = "W3" W_TRAILING_SPACE = "W4" W_COMMENT_INDENT = "W5" VIOLATION_MSG = { E_MALFORMED: 'JSON content is malformed.', E_COMMENTS: 'Comments are not part of the JSON spec.', E_COMMA: 'Dangling comma found.', W_NL_START: 'Unnecessary newlines at the start of file.', W_NL_END: 'Missing a new line at the end of the file.', W_INDENT: 'Indentation Error.', W_TRAILING_SPACE: 'Trailing whitespace.', W_COMMENT_INDENT: 'Comment Indentation Error.' } class CheckJsonFormat(object): """ Test JSON for format irregularities. - Trailing spaces. - Inconsistent indentation. - New lines at end of file. - Unnecessary newlines at start of file. - Trailing commas. - Malformed JSON. """ def __init__(self, use_tabs=False, allow_comments=False): """Setup the settings.""" self.use_tabs = use_tabs self.allow_comments = allow_comments self.fail = False def index_lines(self, text): """Index the char range of each line.""" self.line_range = [] count = 1 last = 0 for m in re.finditer('\n', text): self.line_range.append((last, m.end(0) - 1, count)) last = m.end(0) count += 1 def get_line(self, pt): """Get the line from char index.""" line = None for r in self.line_range: if pt >= r[0] and pt <= r[1]: line = r[2] break return line def check_comments(self, text): """ Check for JavaScript comments. Log them and strip them out so we can continue. """ def remove_comments(group): return ''.join([x[0] for x in RE_LINE_PRESERVE.findall(group)]) def evaluate(m): text = '' g = m.groupdict() if g["code"] is None: if not self.allow_comments: self.log_failure(E_COMMENTS, self.get_line(m.start(0))) text = remove_comments(g["comments"]) else: text = g["code"] return text content = ''.join(map(lambda m: evaluate(m), RE_COMMENT.finditer(text))) return content def check_dangling_commas(self, text): """ Check for dangling commas. Log them and strip them out so we can continue. """ def check_comma(g, m, line): # ,] -> ] or ,} -> } self.log_failure(E_COMMA, line) if g["square_comma"] is not None: return g["square_ws"] + g["square_bracket"] else: return g["curly_ws"] + g["curly_bracket"] def evaluate(m): g = m.groupdict() return check_comma(g, m, self.get_line(m.start(0))) if g["code"] is None else g["code"] return ''.join(map(lambda m: evaluate(m), RE_TRAILING_COMMA.finditer(text))) def log_failure(self, code, line=None): """ Log failure. Log failure code, line number (if available) and message. """ if line: print("%s: Line %d - %s" % (code, line, VIOLATION_MSG[code])) else: print("%s: %s" % (code, VIOLATION_MSG[code])) self.fail = True def check_format(self, file_name): """Initiate the check.""" self.fail = False comment_align = None with codecs.open(file_name, encoding='utf-8') as f: count = 1 for line in f: indent_match = (RE_LINE_INDENT_TAB if self.use_tabs else RE_LINE_INDENT_SPACE).match(line) end_comment = ( (comment_align is not None or (indent_match and indent_match.group(2))) and RE_COMMENT_END.search(line) ) # Don't allow empty lines at file start. if count == 1 and line.strip() == '': self.log_failure(W_NL_START, count) # Line must end in new line if not line.endswith('\n'): self.log_failure(W_NL_END, count) # Trailing spaces if RE_TRAILING_SPACES.match(line): self.log_failure(W_TRAILING_SPACE, count) # Handle block comment content indentation if comment_align is not None: if comment_align.match(line) is None: self.log_failure(W_COMMENT_INDENT, count) if end_comment: comment_align = None # Handle general indentation elif indent_match is None: self.log_failure(W_INDENT, count) # Enter into block comment elif comment_align is None and indent_match.group(2): alignment = indent_match.group(1) if indent_match.group(1) is not None else "" if not end_comment: comment_align = re.compile( (PATTERN_COMMENT_INDENT_TAB if self.use_tabs else PATTERN_COMMENT_INDENT_SPACE) % alignment ) count += 1 f.seek(0) text = f.read() self.index_lines(text) text = self.check_comments(text) self.index_lines(text) text = self.check_dangling_commas(text) try: json.loads(text) except Exception as e: self.log_failure(E_MALFORMED) print(e) return self.fail if __name__ == "__main__": import sys cjf = CheckJsonFormat(False, True) cjf.check_format(sys.argv[1])