Tuesday, January 21, 2014

Read MS word document in php.

<form action="<?php $_PHP_SELF ?>" method="post" enctype="multipart/form-data">
                <label for="file">File Name:</label>
                <input type="hidden" name="MAX_FILE_SIZE" value="100000" />
                <input type="file" name="file" id="file">
                <input type="submit" name="submit" value="Submit">
</form> 

<?php                                  
            if(isset($_POST['submit']))
            {                                                                             
                                $filename=$_FILES['file']['name'];        
                                $striped_content = '';
                                $content = '';                     
                                if(!$filename || !file_exists($filename)) return false;                    
                                $zip = zip_open($filename);                      
                                if (!$zip || is_numeric($zip)) return false; 
                            
                                while ($zip_entry = zip_read($zip))
                                {                                            
                                           if (zip_entry_open($zip, $zip_entry) == FALSE)
                                           continue; 
                                           
                                           if (zip_entry_name($zip_entry) != "word/document.xml")
                                                continue;

                                           $content.= zip_entry_read($zip_entry,zip_entry_filesize($zip_entry)); 
                                                                                                                                                                                               zip_entry_close($zip_entry);
                                }

                                zip_close($zip);                                                                                
                                $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
                                $content = str_replace('</w:r></w:p>', "\r\n", $content);
                                $striped_content = strip_tags($content);                            
                                echo $striped_content;
            }                                                                      
            ?>