虽然这并不能回答我用regex解析HTML XML时遇到的问题,但它仍然可以解决我的问题。
所以,我从XSLT调用Java函数。
Java代码:
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NutrientValues {
private static final String regex = "Energy.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Fat.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Saturates.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Carbohydrate.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Sugars.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Fibre.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Protein.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPA N>.*?Salt.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>";
private static final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
private static Matcher matcher;
public static boolean process(String htmldoc) {
matcher = pattern.matcher(htmldoc);
return matcher.find();
}
public static String getEnergyPer100() {
return matcher.group(1);
}
public static String getEnergyPerServ() {
return matcher.group(2);
}
public static String getFatPer100() {
return matcher.group(3);
}
public static String getFatPerServ() {
return matcher.group(4);
}
public static String getSaturatesPer100() {
return matcher.group(5);
}
public static String getSaturatesPerServ() {
return matcher.group(6);
}
public static String getCarbohydratePer100() {
return matcher.group(7);
}
public static String getCarbohydratePerServ() {
return matcher.group(8);
}
public static String getSugarsPer100() {
return matcher.group(9);
}
public static String getSugarsPerServ() {
return matcher.group(10);
}
public static String getFibrePer100() {
return matcher.group(11);
}
public static String getFibrePerServ() {
return matcher.group(12);
}
public static String getProteinPer100() {
return matcher.group(13);
}
public static String getProteinPerServ() {
return matcher.group(14);
}
public static String getSaltPer100() {
return matcher.group(15);
}
public static String getSaltPerServ() {
return matcher.group(16);
}
}
结果:
Group 1: 1373kJ / 329kcal
Group 2: 1717kJ / 411kcal
Group 3: 20.0g
Group 4: 25.0g
Group 5: 11.2g
Group 6: 14.0g
Group 7: 32.9g
Group 8: 41.1g
Group 9: 16.2g
Group 10: 20.2g
Group 11: 1.3g
Group 12: 1.6g
Group 13: 3.9g
Group 14: 4.9g
Group 15: 0.1g
Group 16: 0.1g