Get all the text in all the slides

OpenXML SDK

class Program
    {
        static void Main(string[] args)
        {
            string file = "Get all the text in a slide.pptx";
            int numberOfSlides = CountSlides(file);
            System.Console.WriteLine("Number of slides = {0}", numberOfSlides);
            string slideText;
            for (int i = 0; i < numberOfSlides; i++)
            {
                GetSlideIdAndText(out slideText, file, i);
                System.Console.WriteLine("Slide #{0} contains: {1}", i + 1, slideText);
            }
            System.Console.ReadKey();
        }
        public static int CountSlides(string presentationFile)
        {
            // Open the presentation as read-only.
            using (PresentationDocument presentationDocument = PresentationDocument.Open(presentationFile, false))
            {
                // Pass the presentation to the next CountSlides method
                // and return the slide count.
                return CountSlides(presentationDocument);
            }
        }

        // Count the slides in the presentation.
        public static int CountSlides(PresentationDocument presentationDocument)
        {
            // Check for a null document object.
            if (presentationDocument == null)
            {
                throw new ArgumentNullException("presentationDocument");
            }

            int slidesCount = 0;

            // Get the presentation part of document.
            PresentationPart presentationPart = presentationDocument.PresentationPart;
            // Get the slide count from the SlideParts.
            if (presentationPart != null)
            {
                slidesCount = presentationPart.SlideParts.Count();
            }
            // Return the slide count to the previous method.
            return slidesCount;
        }

        public static void GetSlideIdAndText(out string sldText, string docName, int index)
        {
            using (PresentationDocument ppt = PresentationDocument.Open(docName, false))
            {
                // Get the relationship ID of the first slide.
                PresentationPart part = ppt.PresentationPart;
                OpenXmlElementList slideIds = part.Presentation.SlideIdList.ChildElements;

                string relId = (slideIds[index] as SlideId).RelationshipId;

                // Get the slide part from the relationship ID.
                SlidePart slide = (SlidePart)part.GetPartById(relId);

                // Build a StringBuilder object.
                StringBuilder paragraphText = new StringBuilder();

                // Get the inner text of the slide:
                IEnumerable<A.Text> texts = slide.Slide.Descendants<A.Text>();
                foreach (A.Text text in texts)
                {
                    paragraphText.Append(text.Text);
                }
                sldText = paragraphText.ToString();
            }
        }

    }

Aspose.Slides

static void Main(string[] args)
        {
            string file = "Get all the text in a slide.pptx";
            int numberOfSlides = CountSlides(file);
            System.Console.WriteLine("Number of slides = {0}", numberOfSlides);
            string slideText;
            for (int i = 0; i < numberOfSlides; i++)
            {
                slideText = GetSlideText(file, i);
                System.Console.WriteLine("Slide #{0} contains: {1}", i + 1, slideText);
            }
            System.Console.ReadKey();
        }
        public static int CountSlides(string presentationFile)
        {
            //Instantiate PresentationEx class that represents PPTX
            using (PresentationEx pres = new PresentationEx(presentationFile))
            {
                return pres.Slides.Count;
            }
        }
        public static string GetSlideText(string docName, int index)
        {
            string sldText = "";
            //Instantiate PresentationEx class that represents PPTX
            using (PresentationEx pres = new PresentationEx(docName))
            {
                //Access the slide
                SlideEx sld = pres.Slides[index];

                //Iterate through shapes to find the placeholder
                foreach (ShapeEx shp in sld.Shapes)
                    if (shp.Placeholder != null)
                    {
                        //get the text of each placeholder
                        sldText += ((AutoShapeEx)shp).TextFrame.Text;
                    }

            }
            return sldText;
        }

Download

Last edited Jan 17, 2014 at 6:24 AM by asposemarketplace, version 2