diff --git a/page.go b/page.go index 9c7d688..424b9ef 100644 --- a/page.go +++ b/page.go @@ -403,7 +403,23 @@ type gstate struct { // Content returns the page's content. func (p Page) Content() Content { - strm := p.V.Key("Contents") + switch v := p.V.Key("Contents"); v.Kind() { + case Stream: + return p.contentForStream(v) + case Array: + var c Content + for i := 0; i < v.Len(); i++ { + cfs := p.contentForStream(v.Index(i)) + c.Text = append(c.Text, cfs.Text...) + c.Rect = append(c.Rect, cfs.Rect...) + } + return c + default: + panic("bad content kind") + } +} + +func (p Page) contentForStream(strm Value) Content { var enc TextEncoding = &nopEncoder{} var g = gstate{ @@ -484,9 +500,12 @@ func (p Page) Content() Content { gstack = append(gstack, g) case "Q": // restore graphics state - n := len(gstack) - 1 - g = gstack[n] - gstack = gstack[:n] + // gstack should not be empty...but sometimes it is + if len(gstack) > 0 { + n := len(gstack) - 1 + g = gstack[n] + gstack = gstack[:n] + } case "BT": // begin text (reset text matrix and line matrix) g.Tm = ident diff --git a/read.go b/read.go index 6fe41b4..bfd5401 100644 --- a/read.go +++ b/read.go @@ -67,6 +67,7 @@ import ( "crypto/cipher" "crypto/md5" "crypto/rc4" + "errors" "fmt" "io" "io/ioutil" @@ -123,13 +124,11 @@ func NewReader(f io.ReaderAt, size int64) (*Reader, error) { const endChunk = 100 buf = make([]byte, endChunk) f.ReadAt(buf, end-endChunk) - for len(buf) > 0 && buf[len(buf)-1] == '\n' || buf[len(buf)-1] == '\r' { - buf = buf[:len(buf)-1] - } - buf = bytes.TrimRight(buf, "\r\n\t ") - if !bytes.HasSuffix(buf, []byte("%%EOF")) { - return nil, fmt.Errorf("not a PDF file: missing %%%%EOF") + eof := bytes.LastIndex(buf, []byte("%%EOF")) + if eof == -1 { + return nil, errors.New("not a PDF file: missing %%EOF") } + buf = buf[:eof] i := findLastLine(buf, "startxref") if i < 0 { return nil, fmt.Errorf("malformed PDF file: missing final startxref") diff --git a/valuekind_string.go b/valuekind_string.go new file mode 100644 index 0000000..006be9c --- /dev/null +++ b/valuekind_string.go @@ -0,0 +1,21 @@ +// generated by stringer -type=ValueKind; DO NOT EDIT + +package pdf + +import "fmt" + +const _ValueKind_name = "NullBoolIntegerRealStringNameDictArrayStream" + +var _ValueKind_index = [...]uint8{4, 8, 15, 19, 25, 29, 33, 38, 44} + +func (i ValueKind) String() string { + if i < 0 || i >= ValueKind(len(_ValueKind_index)) { + return fmt.Sprintf("ValueKind(%d)", i) + } + hi := _ValueKind_index[i] + lo := uint8(0) + if i > 0 { + lo = _ValueKind_index[i-1] + } + return _ValueKind_name[lo:hi] +}