您可以遍历表并检查第一个单元格中的文本。我修改了输出以返回数据帧列表,以防找到多个表。如果没有符合条件的表,它将返回一个空列表。
def make_dataframe(f_name, first_cell_string='tag number'):
document = Document(f_name)
# create a list of all of the table object with text of the
# first cell equal to `first_cell_string`
tables = [t for t in document.tables
if t.cell(0,0).text.lower().strip()==first_cell_string]
# in the case that more than one table is found
out = []
for table in tables:
for i, row in enumerate(table.rows):
text = (cell.text for cell in row.cells)
if i == 0:
keys = tuple(text)
continue
row_data = dict(zip(keys, text))
data.append(row_data)
out.append(pd.DataFrame.from_dict(data))
return out