I take the coordinates using `.get_drawing()`, get the coordinates, and use them to draw exactly the same graph. I normalize the data and draw it through another library, reportlib (the problem is not on the reportlib side, because I tried other ways to draw graphs and the result was the same). There are N number of graphs in a single-page pdf file. I take their coordinates, put them in an array and draw them. But for some reason, as a result, I get minimal errors, and only in two of the four sides of the coordinate plane.
exmaple pdf:
example.pdf (33.1 KB)
result:
output_page_vt_graph.pdf (22.5 KB)
import pymupdf
from reportlab.graphics.shapes import Drawing
from reportlab.graphics.charts.lineplots import LinePlot
from reportlab.graphics.charts.axes import XValueAxis, YValueAxis
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
def extact_graphics(filepath: str):
doc = pymupdf.open(filepath)
page = doc[0]
drawings = page.get_drawings()
all_paths: list[list[tuple[float, float]]] =
for shape in drawings:
path: list[tuple[float, float]] = []
for item in shape["items"]:
op, *coords = item
if op == "l":
path.append(coords[1])
if path:
page_height = page.rect.height
transformed_path = [(x, page_height - y) for (x, y) in path]
if len(transformed_path) > 2:
offset_x = transformed_path[0][0]
offset_y = transformed_path[0][1]
normalized_transformed_path = apply_offset(
transformed_path,
offset_x,
offset_y
)
all_paths.append(normalized_transformed_path)
return all_paths
def apply_offset(paths: list[tuple[float, float]], offset_x: float, offset_y: float) → list[tuple[float, float]]:
transformed_paths: list[tuple[float, float]] =
for path in paths:
transformed_path = (path[0] - offset_x, path[1] - offset_y)
transformed_paths.append(transformed_path)
return transformed_paths
def render_paths_to_pdf(paths: list[list[tuple[float, float]]], output_path: str) → None:
width, height = A4
c = canvas.Canvas(output_path, pagesize=A4)
for path in paths:
drawing = Drawing(width, height)
lp = LinePlot()
lp.x = 50
lp.y = 50
lp.height = height - 100
lp.width = width - 100
lp.data = [path]
lp.joinedLines = True
lp.xValueAxis = XValueAxis()
lp.yValueAxis = YValueAxis()
drawing.add(lp)
drawing.drawOn(c, 0, 0)
c.showPage()
c.save()
if
name
== ‘
main
’:
fn = ‘example.pdf’
result = extact_graphics(fn)
render_paths_to_pdf(
result[2:], # drop 2 first element, because first graphic was splitted to 2 parts.
"output_page_vt_graph.pdf"
)