Source code for eldam.utils.simapro

""" Functions used for SimaPro .xlsx files management """
import re
import json


[docs]def extract_data_from_comment(comment):
    r"""
    Extracts data from a comment.

    For example, quality data like process modification code and comment

    Args:
        comment (str): Text containing the data

    Returns:
       dict: Dictionnary containing every parsed data

    Examples:
        >>> extract_data_from_comment('Comment on test co-product\n\n!!! DO NOT EDIT BELOW THIS LINE !!!'
        ...         '\n[[{"review_state": 2, "reviewer_comment":"a reviewer comment",'
        ...         '"comment_for_reviewer": "a comment for reviewer"}]]')
        {'comment': 'Comment on test co-product', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'}
        >>> extract_data_from_comment('1BZ:\n1: Information on process modification\nB: Info on relevance'
        ...         '\nZ: Info on confidence\n\nRest of the comment\n\n!!! DO NOT EDIT BELOW THIS LINE !!!\n'
        ...         '[[{"library": "Own process", "review_state": 2, "reviewer_comment": "a reviewer comment",'
        ...         '"comment_for_reviewer": "a comment for reviewer"}]]')
        {'modification_code': '1', 'relevance_code': 'B', 'confidence_code': 'Z', 'modification_comment': 'Information on process modification', 'relevance_comment': 'Info on relevance', 'confidence_comment': 'Info on confidence', 'comment': 'Rest of the comment', 'library': 'Own process', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'}
        >>> extract_data_from_comment('0AZ:\nA: info on relevance (the line about modification has been '
        ...         'skipped because code 0)\nZ: info on confidence\n\nrest of the comment\non 2 lines'
        ...         '\n\n!!! DO NOT EDIT BELOW THIS LINE !!!\n[[{"data_source": "Source of the data", '
        ...         '"library": "Own process", "review_state": 2,"reviewer_comment": "a reviewer comment", '
        ...         '"comment_for_reviewer": "a comment for reviewer"}]]')
        {'modification_code': '0', 'relevance_code': 'A', 'confidence_code': 'Z', 'relevance_comment': 'info on relevance (the line about modification has been skipped because code 0)', 'confidence_comment': 'info on confidence', 'comment': 'rest of the comment\non 2 lines', 'data_source': 'Source of the data', 'library': 'Own process', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'}
        >>> extract_data_from_comment('Comment without acv cirad data\n\n!!! DO NOT EDIT BELOW THIS LINE !!!\n'
        ...         '[[{"review_state": 2, "reviewer_comment": "a reviewer comment", '
        ...         '"comment_for_reviewer": "a comment for reviewer"}]]')
        {'comment': 'Comment without acv cirad data', 'review_state': 2, 'reviewer_comment': 'a reviewer comment', 'comment_for_reviewer': 'a comment for reviewer'}
        >>> extract_data_from_comment('AY:\nA: info on relevance\nY: info on confidence')
        {'relevance_code': 'A', 'confidence_code': 'Y', 'relevance_comment': 'info on relevance', 'confidence_comment': 'info on confidence'}
        >>> extract_data_from_comment('Just a simple comment')
        {'comment': 'Just a simple comment'}
        >>> extract_data_from_comment('BY')
        {'relevance_code': 'B', 'confidence_code': 'Y'}
        >>> extract_data_from_comment('(3AZ)\n\nComments on quality data are missing')
        {'modification_code': '3', 'relevance_code': 'A', 'confidence_code': 'Z', 'comment': 'Comments on quality data are missing'}
    """

    if comment is None:
        return dict()

    comment = str(comment)

    # If JSON data are presents, isolates it
    if '!!! DO NOT EDIT BELOW THIS LINE !!!' in comment:
        comment, json_data = comment.split('!!! DO NOT EDIT BELOW THIS LINE !!!\n[[')

        # Stripping the strailing ]]
        json_data = json_data[:-2]

        # Unserializing json data
        json_data = json.loads(bytes(json_data, 'utf-8'))
    else:
        json_data = {}

    # Defining regexps
    # Finding if the comment contains a header with quality codes
    find_quality_codes_header = r"^\(?(?! ?:?\n)((?P<modification_code>[0123]) ?[-,;]? ?)?((?P<relevance_code>[AB]) ?[-,;]? ?)?(?P<confidence_code>[YZ])? ?:?\)? ?(\n|$)"

    # Finding quality codes and comments (with header removed)
    find_quality_codes_with_comments = r"^((?P<modification_code>[0123])(?!\w) ?:?-? ?(?P<modification_comment>.*)(\n|$))?((?P<relevance_code>[AB])(?!\w) ?:?-? ?(?P<relevance_comment>.*)(\n|$))?((?P<confidence_code>[YZ])(?!\w) ?:?-? ?(?P<confidence_comment>.*)(\n|$))?"

    # Extracting potential quality data header
    data = re.search(find_quality_codes_header, comment)
    result = data.groupdict() if data is not None else dict()

    # Removing quality data header form comment
    comment = re.sub(find_quality_codes_header, '', comment)

    # Extracting quality data code+comment
    data2 = re.search(find_quality_codes_with_comments, comment)
    data2 = {k: v for k, v in data2.groupdict().items() if v not in (None, '')}

    result.update(data2)
    result = {k: v for k, v in result.items() if v not in (None, '')}

    # Removing quality data form comment
    comment = re.sub(find_quality_codes_with_comments, '', comment)

    result.update({'comment': comment.strip(), **json_data})

    # Removing trailing newlines after the comment
    if 'comment' in result:
        result['comment'] = result['comment'].rstrip()

        if result['comment'] == '':
            del result['comment']

    result = {**result, **json_data}

    return result