Source code for eldam.utils.simapro

""" Functions used for SimaPro .xlsx files management """
import re
import json


[docs]def extract_data_from_comment(comment): r""" Extracts data from a comment. For example, quality data like process modification code and comment Args: comment (str): Text containing the data Returns: dict: Dictionnary containing every parsed data Examples: >>> extract_data_from_comment('Comment on test co-product\n\n!!! DO NOT EDIT BELOW THIS LINE !!!' ... '\n[[{"review_state": 2, "reviewer_comment":"a reviewer comment",' ... '"comment_for_reviewer": "a comment for reviewer"}]]') {'comment': 'Comment on test co-product', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'} >>> extract_data_from_comment('1BZ:\n1: Information on process modification\nB: Info on relevance' ... '\nZ: Info on confidence\n\nRest of the comment\n\n!!! DO NOT EDIT BELOW THIS LINE !!!\n' ... '[[{"library": "Own process", "review_state": 2, "reviewer_comment": "a reviewer comment",' ... '"comment_for_reviewer": "a comment for reviewer"}]]') {'modification_code': '1', 'relevance_code': 'B', 'confidence_code': 'Z', 'modification_comment': 'Information on process modification', 'relevance_comment': 'Info on relevance', 'confidence_comment': 'Info on confidence', 'comment': 'Rest of the comment', 'library': 'Own process', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'} >>> extract_data_from_comment('0AZ:\nA: info on relevance (the line about modification has been ' ... 'skipped because code 0)\nZ: info on confidence\n\nrest of the comment\non 2 lines' ... '\n\n!!! DO NOT EDIT BELOW THIS LINE !!!\n[[{"data_source": "Source of the data", ' ... '"library": "Own process", "review_state": 2,"reviewer_comment": "a reviewer comment", ' ... '"comment_for_reviewer": "a comment for reviewer"}]]') {'modification_code': '0', 'relevance_code': 'A', 'confidence_code': 'Z', 'relevance_comment': 'info on relevance (the line about modification has been skipped because code 0)', 'confidence_comment': 'info on confidence', 'comment': 'rest of the comment\non 2 lines', 'data_source': 'Source of the data', 'library': 'Own process', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'} >>> extract_data_from_comment('Comment without acv cirad data\n\n!!! DO NOT EDIT BELOW THIS LINE !!!\n' ... '[[{"review_state": 2, "reviewer_comment": "a reviewer comment", ' ... '"comment_for_reviewer": "a comment for reviewer"}]]') {'comment': 'Comment without acv cirad data', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'} >>> extract_data_from_comment('AY:\nA: info on relevance\nY: info on confidence') {'relevance_code': 'A', 'confidence_code': 'Y', 'relevance_comment': 'info on relevance', 'confidence_comment': 'info on confidence'} >>> extract_data_from_comment('Just a simple comment') {'comment': 'Just a simple comment'} >>> extract_data_from_comment('BY') {'relevance_code': 'B', 'confidence_code': 'Y'} >>> extract_data_from_comment('(3AZ)\n\nComments on quality data are missing') {'modification_code': '3', 'relevance_code': 'A', 'confidence_code': 'Z', 'comment': 'Comments on quality data are missing'} """ if comment is None: return dict() comment = str(comment) # If JSON data are presents, isolates it if '!!! DO NOT EDIT BELOW THIS LINE !!!' in comment: comment, json_data = comment.split('!!! DO NOT EDIT BELOW THIS LINE !!!\n[[') # Stripping the strailing ]] json_data = json_data[:-2] # Unserializing json data json_data = json.loads(bytes(json_data, 'utf-8')) else: json_data = {} # Defining regexps # Finding if the comment contains a header with quality codes find_quality_codes_header = r"^\(?(?! ?:?\n)((?P<modification_code>[0123]) ?[-,;]? ?)?((?P<relevance_code>[AB]) ?[-,;]? ?)?(?P<confidence_code>[YZ])? ?:?\)? ?(\n|$)" # Finding quality codes and comments (with header removed) find_quality_codes_with_comments = r"^((?P<modification_code>[0123])(?!\w) ?:?-? ?(?P<modification_comment>.*)(\n|$))?((?P<relevance_code>[AB])(?!\w) ?:?-? ?(?P<relevance_comment>.*)(\n|$))?((?P<confidence_code>[YZ])(?!\w) ?:?-? ?(?P<confidence_comment>.*)(\n|$))?" # Extracting potential quality data header data = re.search(find_quality_codes_header, comment) result = data.groupdict() if data is not None else dict() # Removing quality data header form comment comment = re.sub(find_quality_codes_header, '', comment) # Extracting quality data code+comment data2 = re.search(find_quality_codes_with_comments, comment) data2 = {k: v for k, v in data2.groupdict().items() if v not in (None, '')} result.update(data2) result = {k: v for k, v in result.items() if v not in (None, '')} # Removing quality data form comment comment = re.sub(find_quality_codes_with_comments, '', comment) result.update({'comment': comment.strip(), **json_data}) # Removing trailing newlines after the comment if 'comment' in result: result['comment'] = result['comment'].rstrip() if result['comment'] == '': del result['comment'] result = {**result, **json_data} return result