问题1。是的,有一种方法可以让Alexa查找这些PDF文件并阅读定义。amazonalexa支持lambda函数。Lambda支持.Net内核。Foxit PDF SDK 6.4在.Net内核中工作。Foxit PDF SDK 6.4支持在PDF中搜索关键字。您可以使用Foxxit PDF SDK来搜索关键字,并尝试解析PDF中的文本数据以获得定义。
此解决方案需要用于.net的Foxit PDF SDK 6.4。您可以通过以下链接找到对评估包的请求:
https://developers.foxitsoftware.com/pdf-sdk/free-trial
using foxit;
using foxit.common;
using foxit.common.fxcrt;
using foxit.pdf;
public string SearchPDF(string inputPDF, string searchTerm)//inputPDF is the PDF path with the PDF itself and its .pdf extension. the serachTerm is the term you want to search.
{
string sn = "SNValue"; //the SN value provided in the evaluation package at lib\gsdk_sn.txt
string key = "SignKeyValue"; //the Sign value provided in evaluation package at lib\gsdk_key.txt
ErrorCode error_code;
try
{
error_code = Library.Initialize(sn, key); //Unlocks the library to be used. Make sure you update the sn and key file accordingly.
if (error_code != ErrorCode.e_ErrSuccess)
{
return error_code.ToString();
}
PDFDoc doc = new PDFDoc(inputPDF);
error_code = doc.Load(null); //Loads the PDF into the Foxit PDF SDK
if (error_code != ErrorCode.e_ErrSuccess)
{
return error_code.ToString(); //Returns a error code if loading the document fails
}
using (TextSearch search = new TextSearch(doc, null))
{
int start_index = 0;
int end_index = doc.GetPageCount() - 1;
search.SetStartPage(0);
search.SetEndPage(doc.GetPageCount() - 1);
search.SetPattern(searchTerm); //Sets the search term to be search in the PDF
Int32 flags = (int)TextSearch.SearchFlags.e_SearchNormal;
// if want to specify flags, you can do as followings:
// flags |= TextSearch::e_SearchMatchCase;
// flags |= TextSearch::e_SearchMatchWholeWord;
// flags |= TextSearch::e_SearchConsecutive;
int match_count = 0;
while (search.FindNext())
{
RectFArray rect_array = search.GetMatchRects()
string sentenceWithSearchTerm = search.GetMatchSentence();// Gets the sentence with the search term
match_count++;
}
}
doc.Dispose();
Library.Release();
}
catch (foxit.PDFException e)
{
return e.Message;
}
catch (Exception e)
{
return e.Message;
}
return error_code.ToString().ToUpper(); //If successful this will return the "E_ERRSUCCESS." Please check out the headers for other error codes.
}
using (var doc = new PDFDoc(inputPDF)){
error_code = doc.Load(null);
if (error_code != ErrorCode.e_ErrSuccess)
{
return error_code.ToString();
}
// Get page count
int pageCount = doc.GetPageCount();
for (int i = 0; i < pageCount; i++) //A loop that goes through each page
{
using (var page = doc.GetPage(i))
{
// Parse page
page.StartParse((int)PDFPage.ParseFlags.e_ParsePageNormal, null, false);
// Get the text select object.
using (var text_select = new TextPage(page, (int)TextPage.TextParseFlags.e_ParseTextNormal))
{
int count = text_select.GetCharCount();
if (count > 0)
{
String chars = text_select.GetChars(0, count); //gets the text on the PDF page.
}
}
}
}
}
问题3。我不知道你所说的机器可读格式是什么意思,但是Foxit PDF SDK可以提供字符串格式的文本。