代码之家  ›  专栏  ›  技术社区  ›  tftd

为什么这个正则表达式没有捕获最后一个输入?复制

  •  2
  • tftd  · 技术社区  · 11 月前

    我不明白我写的这个正则表达式出了什么问题。

    import re
    
    # Define the input strings
    inputs = [
        "exmpl-staging-1234-e2e-1707336983872",
        "exmpl-staging-1234-e2e-1706336983875",
        "exmpl-staging-main-e2e-1707336983878",
        "exmpl-demo-e2e-1707336983878",
        "exmpl-production-e2e-1707336983875",
        "exmpl-staging-2345",
        "exmpl-staging-1234",
        "exmpl-staging-1234-my-case-title",
        "exmpl-staging-1234-my-case-title-e2e-1707336983872"
    ]
    
    # Define the regex pattern
    pattern = re.compile(r'^exmpl-(?P<type>main|staging|demo|production)(?:-(?P<case>\d+|main))?(?:-(?P<title>(?!e2e-\d+).+))?(?:-e2e-(?P<timestamp>\d+))?$')
    
    # Initialize a list to store the extracted data
    extracted_data = []
    
    # Loop through the input strings
    for input_str in inputs:
        # Match the pattern against the input string
        match = pattern.match(input_str)
    
        # Extract the required information
        if match:
            extracted_data.append({
                'Input': input_str,
                'Type': match.group('type'),
                'Case': match.group('case') if match.group('case') else 'none',
                'Title': match.group('title') if match.group('title') else 'none',
                'Timestamp': match.group('timestamp') if match.group('timestamp') else 'none'
            })
        else:
            extracted_data.append({
                'Input': input_str,
                'Type': 'No match found',
                'Case': 'No match found',
                'Title': 'No match found',
                'Timestamp': 'No match found'
            })
    
    print("| Input                                              | Type       | Case    | Title                                | Timestamp       |")
    print("|----------------------------------------------------|------------|---------|--------------------------------------|-----------------|")
    for data in extracted_data:
        print("| {:<50} | {:<10} | {:<7} | {:<36} | {:<15} |".format(data['Input'], data['Type'], data['Case'], data['Title'], data['Timestamp']))
    

    这是它提供的输出:

    | Input                                              | Type       | Case    | Title                                | Timestamp       |
    |----------------------------------------------------|------------|---------|--------------------------------------|-----------------|
    | exmpl-staging-1234-e2e-1707336983872               | staging    | 1234    | none                                 | 1707336983872   |
    | exmpl-staging-1234-e2e-1706336983875               | staging    | 1234    | none                                 | 1706336983875   |
    | exmpl-staging-main-e2e-1707336983878               | staging    | main    | none                                 | 1707336983878   |
    | exmpl-demo-e2e-1707336983878                       | demo       | none    | none                                 | 1707336983878   |
    | exmpl-production-e2e-1707336983875                 | production | none    | none                                 | 1707336983875   |
    | exmpl-staging-2345                                 | staging    | 2345    | none                                 | none            |
    | exmpl-staging-1234                                 | staging    | 1234    | none                                 | none            |
    | exmpl-staging-1234-my-case-title                   | staging    | 1234    | my-case-title                        | none            |
    | exmpl-staging-1234-my-case-title-e2e-1707336983872 | staging    | 1234    | my-case-title-e2e-1707336983872      | none            |
    

    它一直按预期工作到最后 input 其中 timestamp 是空的,并且 时间戳 被错误地捕获为的一部分 title 组我在这里做错了什么?

    1 回复  |  直到 11 月前
        1
  •  2
  •   Andrej Kesely    11 月前

    制作 标题 群体非贪婪( Regex101 ):

    ^exmpl-(?P<type>main|staging|demo|production)(?:-(?P<case>\d+|main))?(?:-(?P<title>(?!e2e-\d+).+?))?(?:-e2e-(?P<timestamp>\d+))?$
    

    import re
    
    # Define the input strings
    inputs = [
        "exmpl-staging-1234-e2e-1707336983872",
        "exmpl-staging-1234-e2e-1706336983875",
        "exmpl-staging-main-e2e-1707336983878",
        "exmpl-demo-e2e-1707336983878",
        "exmpl-production-e2e-1707336983875",
        "exmpl-staging-2345",
        "exmpl-staging-1234",
        "exmpl-staging-1234-my-case-title",
        "exmpl-staging-1234-my-case-title-e2e-1707336983872",
    ]
    
    # Define the regex pattern
    pattern = re.compile(
        r"^exmpl-(?P<type>main|staging|demo|production)(?:-(?P<case>\d+|main))?(?:-(?P<title>(?!e2e-\d+).+?))?(?:-e2e-(?P<timestamp>\d+))?$"
    )
    
    # Initialize a list to store the extracted data
    extracted_data = []
    
    # Loop through the input strings
    for input_str in inputs:
        # Match the pattern against the input string
        match = pattern.match(input_str)
    
        # Extract the required information
        if match:
            extracted_data.append(
                {
                    "Input": input_str,
                    "Type": match.group("type"),
                    "Case": match.group("case") if match.group("case") else "none",
                    "Title": match.group("title") if match.group("title") else "none",
                    "Timestamp": match.group("timestamp")
                    if match.group("timestamp")
                    else "none",
                }
            )
        else:
            extracted_data.append(
                {
                    "Input": input_str,
                    "Type": "No match found",
                    "Case": "No match found",
                    "Title": "No match found",
                    "Timestamp": "No match found",
                }
            )
    
    print(
        "| Input                                              | Type       | Case    | Title                                | Timestamp       |"
    )
    print(
        "|----------------------------------------------------|------------|---------|--------------------------------------|-----------------|"
    )
    for data in extracted_data:
        print(
            "| {:<50} | {:<10} | {:<7} | {:<36} | {:<15} |".format(
                data["Input"], data["Type"], data["Case"], data["Title"], data["Timestamp"]
            )
        )
    

    打印:

    | Input                                              | Type       | Case    | Title                                | Timestamp       |
    |----------------------------------------------------|------------|---------|--------------------------------------|-----------------|
    | exmpl-staging-1234-e2e-1707336983872               | staging    | 1234    | none                                 | 1707336983872   |
    | exmpl-staging-1234-e2e-1706336983875               | staging    | 1234    | none                                 | 1706336983875   |
    | exmpl-staging-main-e2e-1707336983878               | staging    | main    | none                                 | 1707336983878   |
    | exmpl-demo-e2e-1707336983878                       | demo       | none    | none                                 | 1707336983878   |
    | exmpl-production-e2e-1707336983875                 | production | none    | none                                 | 1707336983875   |
    | exmpl-staging-2345                                 | staging    | 2345    | none                                 | none            |
    | exmpl-staging-1234                                 | staging    | 1234    | none                                 | none            |
    | exmpl-staging-1234-my-case-title                   | staging    | 1234    | my-case-title                        | none            |
    | exmpl-staging-1234-my-case-title-e2e-1707336983872 | staging    | 1234    | my-case-title                        | 1707336983872   |